def match_anchor_to_bbox(ground_truth, anchors, device, iou_threshold=0.5): """Assign ground-truth bounding boxes to anchor boxes similar to them.""" num_anchors, num_gt_boxes = anchors.shape[0], ground_truth.shape[0] # Element `x_ij` in the `i^th` row and `j^th` column is the IoU # of the anchor box `anc_i` to the ground-truth bounding box `box_j` jaccard = box_iou(anchors, ground_truth) # Initialize the tensor to hold assigned ground truth bbox for each anchor anchors_bbox_map = np.full((num_anchors, ), fill_value=-1, dtype=np.int32, ctx=device) # Assign ground truth bounding box according to the threshold max_ious, indices = np.max(jaccard, axis=1), np.argmax(jaccard, axis=1) anc_i = np.nonzero(max_ious >= iou_threshold)[0] box_j = indices[max_ious >= iou_threshold] anchors_bbox_map[anc_i] = box_j # Find the largest iou for each bbox col_discard = np.full((num_anchors, ), -1) row_discard = np.full((num_gt_boxes, ), -1) for _ in range(num_gt_boxes): max_idx = np.argmax(jaccard) box_idx = (max_idx % num_gt_boxes).astype('int32') anc_idx = (max_idx / num_gt_boxes).astype('int32') anchors_bbox_map[anc_idx] = box_idx jaccard[:, box_idx] = col_discard jaccard[anc_idx, :] = row_discard return anchors_bbox_map
def test_full(): data1 = np.full((INT_OVERFLOW, 2), np.array([1, 2])) assert data1.shape == (INT_OVERFLOW, 2) assert data1[-1, 0] == 1 and data1[-1, 1] == 2 data2 = np.full((2, INT_OVERFLOW), 3) assert data2.shape == (2, INT_OVERFLOW) assert data2[-1, -1] == 3
def test_to_tensor(): # 3D Input data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1))) # 4D Input data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2))) # Invalid Input invalid_data_in = np.random.uniform( 0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8) transformer = transforms.ToTensor() assertRaises(MXNetError, transformer, invalid_data_in) # Bounds (0->0, 255->1) data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert same( out_nd.asnumpy(), np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1))) data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert same( out_nd.asnumpy(), np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1)))
def _get_vocab_slice_ids(restrict_lexicon: Optional[lexicon.TopKLexicon], source_words: np.ndarray, raw_constraint_list: List[Optional[constrained.RawConstraintList]], eos_id: int, beam_size: int) -> Tuple[np.ndarray, int, List[Optional[constrained.RawConstraintList]]]: vocab_slice_ids = np.array(restrict_lexicon.get_trg_ids(source_words.astype("int32", copy=False).asnumpy()), dtype='int32') ctx = source_words.ctx if any(raw_constraint_list): # Add the constraint IDs to the list of permissibled IDs, and then project them into the reduced space constraint_ids = np.array(word_id for sent in raw_constraint_list for phr in sent for word_id in phr) vocab_slice_ids = onp.lib.arraysetops.union1d(vocab_slice_ids, constraint_ids) # type: ignore full_to_reduced = dict((val, i) for i, val in enumerate(vocab_slice_ids)) raw_constraint_list = [[[full_to_reduced[x] for x in phr] for phr in sent] for sent in raw_constraint_list] # pad to a multiple of 8. vocab_slice_ids = np.pad(vocab_slice_ids, (0, 7 - ((len(vocab_slice_ids) - 1) % 8)), mode='constant', constant_values=eos_id) vocab_slice_ids_shape = vocab_slice_ids.shape[0] if vocab_slice_ids_shape < beam_size + 1: # This fixes an edge case for toy models, where the number of vocab ids from the lexicon is # smaller than the beam size. logger.warning("Padding vocab_slice_ids (%d) with EOS to have at least %d+1 elements to expand", vocab_slice_ids_shape, beam_size) n = beam_size - vocab_slice_ids_shape + 1 vocab_slice_ids = np.concatenate((vocab_slice_ids, np.full((n,), fill_value=eos_id, ctx=ctx, dtype='int32')), axis=0) logger.debug(f'decoder softmax size: {vocab_slice_ids_shape}') return vocab_slice_ids, vocab_slice_ids_shape, raw_constraint_list
def test_prevent_unk_update_scores(): pytest.importorskip("mxnet") from mxnet import np import sockeye.beam_search vocab_size = 10 batch_beam_size = 3 us = sockeye.beam_search.UpdateScores() pad_dist = np.full((batch_beam_size, vocab_size - 1), fill_value=np.inf, dtype='float32') eos_dist = np.full((batch_beam_size, vocab_size), fill_value=np.inf, dtype='float32') eos_dist[:, C.EOS_ID] = 0 unk_dist = np.zeros_like(eos_dist) unk_dist[:, C.UNK_ID] = np.inf # pylint: disable=E1137 lengths = np.array([[0], [1], [0]], dtype='int32') max_lengths = np.array([[1], [2], [3]], dtype='int32') # first on reaches max length scores_accumulated = np.ones((3, 1), dtype='float32') finished = np.array( [ [0], # not finished [1], # finished [0] ], # not finished dtype='int32') inactive = np.zeros_like(finished) target_dists = np.random.uniform(0, 1, (3, vocab_size)) scores, lengths = us(target_dists, finished, inactive, scores_accumulated, lengths, max_lengths, unk_dist, pad_dist, eos_dist) scores = scores lengths = lengths.reshape((-1, )) assert (lengths == np.array( [[1], [1], [1]])).all() # all lengths but finished updated + 1 assert (scores[0] == (1. + target_dists[0] + eos_dist)).all() # 1 reached max length, force eos assert (scores[1] == np.array([1.] + pad_dist[1].tolist()) ).all() # 2 finished, force pad, keep score assert scores[2, C.UNK_ID] == np.inf # 3 scores of <unk> should be np.inf assert (scores[2] == (1. + target_dists[2] + unk_dist[2])).all() # 3 scores + previous scores
def test_softmax(): input_data = np.ones((SMALL_Y, LARGE_X)) for axis in [0, 1]: true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis])) output = npx.softmax(input_data, axis=axis) assert_almost_equal(output.asnumpy(), true_output, rtol=1e-5, atol=1e-5)
def test_power(): A = np.full((2, INT_OVERFLOW), 2) B = np.ones((2, INT_OVERFLOW)) B[-1, -1] = 3 A.attach_grad() B.attach_grad() with mx.autograd.record(): C = np.power(A, B) C.backward() assert C.shape == A.shape assert C[-1, -1] == 8 assert A.grad.shape == A.shape assert A.grad[-1, -1] == 12 assert B.grad.shape == B.shape assert_almost_equal(B.grad[-1, -1], 2**3 * np.log(2), rtol=1e-5, atol=1e-5)
def forward(self, source: np.ndarray, source_length: np.ndarray, restrict_lexicon: Optional[lexicon.TopKLexicon], raw_constraint_list: List[Optional[constrained.RawConstraintList]], raw_avoid_list: List[Optional[constrained.RawConstraintList]], max_output_lengths: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[Optional[np.ndarray]], List[Optional[constrained.ConstrainedHypothesis]]]: """ Translates multiple sentences using beam search. :param source: Source ids. Shape: (batch_size, bucket_key, num_factors). :param source_length: Valid source lengths. Shape: (batch_size,). :param restrict_lexicon: Lexicon to use for vocabulary restriction. :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs) that must appear in each output. :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs) that must NOT appear in each output. :param max_output_lengths: ndarray of maximum output lengths per input in source. Shape: (batch_size,). Dtype: int32. :return List of best hypotheses indices, list of best word indices, array of accumulated length-normalized negative log-probs, hypotheses lengths, predicted lengths of references (if any), constraints (if any). """ batch_size = source.shape[0] logger.debug("beam_search batch size: %d", batch_size) # Maximum beam search iterations (determined by longest input with eos) max_iterations = max_output_lengths.max().item() logger.debug("max beam search iterations: %d", max_iterations) sample_best_hyp_indices = None if self._sample is not None: utils.check_condition(restrict_lexicon is None, "Sampling is not available when working with a restricted lexicon.") sample_best_hyp_indices = np.arange(0, batch_size * self.beam_size, dtype='int32', ctx=self.context) # General data structure: batch_size * beam_size blocks in total; # a full beam for each sentence, followed by the next beam-block for the next sentence and so on # best word_indices (also act as input: (batch*beam, num_target_factors best_word_indices = np.full((batch_size * self.beam_size, self.num_target_factors), fill_value=self.bos_id, ctx=self.context, dtype='int32') # offset for hypothesis indices in batch decoding offset = np.repeat(np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context), self.beam_size) # locations of each batch item when first dimension is (batch * beam) batch_indices = np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context) first_step_mask = np.full((batch_size * self.beam_size, 1), fill_value=np.inf, ctx=self.context, dtype=self.dtype) first_step_mask[batch_indices] = 0.0 # Best word and hypotheses indices across beam search steps from topk operation. best_hyp_indices_list = [] # type: List[np.ndarray] best_word_indices_list = [] # type: List[np.ndarray] lengths = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32') finished = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32') # Extending max_output_lengths to shape (batch_size * beam_size, 1) max_output_lengths = np.repeat(np.expand_dims(max_output_lengths, axis=1), self.beam_size, axis=0) # scores_accumulated: chosen smallest scores in scores (ascending). scores_accumulated = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype=self.dtype) output_vocab_size = self.output_vocab_size # If using a top-k lexicon, select param rows for logit computation that correspond to the # target vocab for this sentence. vocab_slice_ids = None # type: Optional[np.ndarrays] if restrict_lexicon: source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2) vocab_slice_ids, output_vocab_size, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words, raw_constraint_list, self.eos_id, beam_size=1) pad_dist = np.full((batch_size * self.beam_size, output_vocab_size - 1), fill_value=np.inf, ctx=self.context, dtype=self.dtype) eos_dist = np.full((batch_size * self.beam_size, output_vocab_size), fill_value=np.inf, ctx=self.context, dtype=self.dtype) eos_dist[:, C.EOS_ID] = 0 unk_dist = None if self.prevent_unk: unk_dist = np.zeros_like(eos_dist) unk_dist[:, C.UNK_ID] = np.inf # pylint: disable=E1137 # Initialize the beam to track constraint sets, where target-side lexical constraints are present constraints = constrained.init_batch(raw_constraint_list, self.beam_size, self.bos_id, self.eos_id) if self.global_avoid_trie or any(raw_avoid_list): avoid_states = constrained.AvoidBatch(batch_size, self.beam_size, avoid_list=raw_avoid_list, global_avoid_trie=self.global_avoid_trie) avoid_states.consume(best_word_indices[:, 0]) # constraints operate only on primary target factor # (0) encode source sentence, returns a list model_states, estimated_reference_lengths = self._inference.encode_and_initialize(source, source_length) # repeat states to beam_size model_states = _repeat_states(model_states, self.beam_size, self._inference.state_structure()) # repeat estimated_reference_lengths to shape (batch_size * beam_size, 1) estimated_reference_lengths = np.repeat(estimated_reference_lengths, self.beam_size, axis=0) # Records items in the beam that are inactive. At the beginning (t==1), there is only one valid or active # item on the beam for each sentence inactive = np.zeros((batch_size * self.beam_size, 1), dtype='int32', ctx=self.context) t = 1 for t in range(1, max_iterations + 1): # max_iterations + 1 required to get correct results # (1) obtain next predictions and advance models' state # target_dists: (batch_size * beam_size, target_vocab_size) target_dists, model_states, target_factors = self._inference.decode_step(best_word_indices, model_states, vocab_slice_ids) # (2) Produces the accumulated cost of target words in each row. # There is special treatment for finished and inactive rows: inactive rows are inf everywhere; # finished rows are inf everywhere except column zero, which holds the accumulated model score scores, lengths = self._update_scores(target_dists, finished, inactive, scores_accumulated, lengths, max_output_lengths, unk_dist, pad_dist, eos_dist) # Mark entries that should be blocked as having a score of np.inf if self.global_avoid_trie or any(raw_avoid_list): block_indices = avoid_states.avoid() if len(block_indices) > 0: scores[block_indices] = np.inf if self._sample is not None: target_dists[block_indices] = np.inf # (3) Get beam_size winning hypotheses for each sentence block separately. Only look as # far as the active beam size for each sentence. if self._sample is not None: best_hyp_indices, best_word_indices, scores_accumulated = self._sample(scores, target_dists, finished, sample_best_hyp_indices) else: # On the first timestep, all hypotheses have identical histories, so force topk() to choose extensions # of the first row only by setting all other rows to inf if t == 1: scores += first_step_mask best_hyp_indices, best_word_indices, scores_accumulated = self._top(scores, offset) # Constraints for constrained decoding are processed sentence by sentence if any(raw_constraint_list): best_hyp_indices, best_word_indices, scores_accumulated, constraints, inactive = constrained.topk( t, batch_size, self.beam_size, inactive, scores, constraints, best_hyp_indices, best_word_indices, scores_accumulated) # Map from restricted to full vocab ids if needed if restrict_lexicon: best_word_indices = np.take(vocab_slice_ids, best_word_indices, axis=0) # (4) Normalize the scores of newly finished hypotheses. Note that after this until the # next call to topk(), hypotheses may not be in sorted order. _sort_inputs = [best_hyp_indices, best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths] if target_factors is not None: _sort_inputs.append(target_factors) best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths = \ self._sort_norm_and_update_finished(*_sort_inputs) # Collect best hypotheses, best word indices best_word_indices_list.append(best_word_indices) best_hyp_indices_list.append(best_hyp_indices) if self._should_stop(finished, batch_size): break # (5) update models' state with winning hypotheses (ascending) model_states = self._sort_states(best_hyp_indices, *model_states) logger.debug("Finished after %d out of %d steps.", t, max_iterations) # (9) Sort the hypotheses within each sentence (normalization for finished hyps may have unsorted them). scores_accumulated_shape = scores_accumulated.shape folded_accumulated_scores = scores_accumulated.reshape((batch_size, -1)) indices = np.argsort(folded_accumulated_scores.astype('float32', copy=False), axis=1).reshape((-1,)) best_hyp_indices = np.unravel_index(indices, scores_accumulated_shape)[0].astype('int32') + offset scores_accumulated = scores_accumulated.take(best_hyp_indices, axis=0) best_hyp_indices_list.append(best_hyp_indices) lengths = lengths.take(best_hyp_indices, axis=0) all_best_hyp_indices = np.stack(best_hyp_indices_list, axis=1) all_best_word_indices = np.stack(best_word_indices_list, axis=2) constraints = [constraints[x] for x in best_hyp_indices.tolist()] return all_best_hyp_indices, \ all_best_word_indices, \ scores_accumulated, \ lengths.astype('int32', copy=False), \ estimated_reference_lengths, \ constraints
def forward(self, source: np.ndarray, source_length: np.ndarray, restrict_lexicon: Optional[lexicon.TopKLexicon], raw_constraint_list: List[Optional[constrained.RawConstraintList]], raw_avoid_list: List[Optional[constrained.RawConstraintList]], max_output_lengths: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[Optional[np.ndarray]], List[Optional[constrained.ConstrainedHypothesis]]]: """ Translates a single sentence (batch_size=1) using greedy search. :param source: Source ids. Shape: (batch_size=1, bucket_key, num_factors). :param source_length: Valid source lengths. Shape: (batch_size=1,). :param restrict_lexicon: Lexicon to use for vocabulary restriction. :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs) that must appear in each output. :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs) that must NOT appear in each output. :param max_output_lengths: ndarray of maximum output lengths per input in source. Shape: (batch_size=1,). Dtype: int32. :return List of best hypotheses indices, list of best word indices, array of accumulated length-normalized negative log-probs, hypotheses lengths, predicted lengths of references (if any), constraints (if any). """ batch_size = source.shape[0] assert batch_size == 1, "Greedy Search does not support batch_size != 1" # Maximum search iterations (determined by longest input with eos) max_iterations = max_output_lengths.max().item() logger.debug("max greedy search iterations: %d", max_iterations) # best word_indices (also act as input: (batch*beam, num_target_factors best_word_index = np.full((batch_size, self.num_target_factors), fill_value=self.bos_id, ctx=self.context, dtype='int32') outputs = [] # type: List[np.ndarray] vocab_slice_ids = None # type: Optional[np.ndarray] # If using a top-k lexicon, select param rows for logit computation that correspond to the # target vocab for this sentence. if restrict_lexicon: source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2) vocab_slice_ids, _, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words, raw_constraint_list, self.eos_id, beam_size=1) # (0) encode source sentence, returns a list model_states, _ = self._inference.encode_and_initialize(source, source_length) # TODO: check for disabled predicted output length t = 1 for t in range(1, max_iterations + 1): scores, model_states, target_factors = self._inference.decode_step(best_word_index, model_states, vocab_slice_ids=vocab_slice_ids) # shape: (batch*beam=1, 1) best_word_index = self.work_block(scores, vocab_slice_ids, target_factors) outputs.append(best_word_index) if best_word_index == self.eos_id or best_word_index == C.PAD_ID: break logger.debug("Finished after %d out of %d steps.", t, max_iterations) # shape: (1, num_factors, length) stacked_outputs = np.stack(outputs, axis=2) length = np.array([t], dtype='int32') # shape (1,) hyp_indices = np.zeros((1, t + 1), dtype='int32') score = np.array([-1.]) # TODO: return unnormalized proper score return hyp_indices, stacked_outputs, score, length, None, [] # type: ignore
def test_get_training_data_iters(): pytest.importorskip('mxnet') from sockeye import data_io from mxnet import np from sockeye.test_utils import tmp_digits_dataset train_line_count = 100 train_line_count_empty = 0 train_max_length = 30 dev_line_count = 20 dev_max_length = 30 expected_mean = 1.0 expected_std = 0.0 test_line_count = 20 test_line_count_empty = 0 test_max_length = 30 batch_size = 5 num_source_factors = num_target_factors = 1 with tmp_digits_dataset("tmp_corpus", train_line_count, train_line_count_empty, train_max_length - C.SPACE_FOR_XOS, dev_line_count, dev_max_length - C.SPACE_FOR_XOS, test_line_count, test_line_count_empty, test_max_length - C.SPACE_FOR_XOS) as data: # tmp common vocab vcb = vocab.build_from_paths([data['train_source'], data['train_target']]) train_iter, val_iter, config_data, data_info = data_io.get_training_data_iters( sources=[data['train_source']], targets=[data['train_target']], validation_sources=[data['dev_source']], validation_targets=[data['dev_target']], source_vocabs=[vcb], target_vocabs=[vcb], source_vocab_paths=[None], target_vocab_paths=[None], shared_vocab=True, batch_size=batch_size, batch_type=C.BATCH_TYPE_SENTENCE, batch_num_devices=1, max_seq_len_source=train_max_length, max_seq_len_target=train_max_length, bucketing=True, bucket_width=10) assert isinstance(train_iter, data_io.ParallelSampleIter) assert isinstance(val_iter, data_io.ParallelSampleIter) assert isinstance(config_data, data_io.DataConfig) assert data_info.sources == [data['train_source']] assert data_info.targets == [data['train_target']] assert data_info.source_vocabs == [None] assert data_info.target_vocabs == [None] assert config_data.data_statistics.max_observed_len_source == train_max_length assert config_data.data_statistics.max_observed_len_target == train_max_length assert np.isclose(config_data.data_statistics.length_ratio_mean, expected_mean) assert np.isclose(config_data.data_statistics.length_ratio_std, expected_std) assert train_iter.batch_size == batch_size assert val_iter.batch_size == batch_size assert train_iter.default_bucket_key == (train_max_length, train_max_length) assert val_iter.default_bucket_key == (dev_max_length, dev_max_length) assert train_iter.dtype == 'float32' # test some batches bos_id = vcb[C.BOS_SYMBOL] eos_id = vcb[C.EOS_SYMBOL] expected_first_target_symbols = np.full((batch_size, 1), bos_id, dtype='float32') for epoch in range(2): while train_iter.iter_next(): batch = train_iter.next() assert isinstance(batch, data_io.Batch) source = batch.source target = batch.target label = batch.labels[C.TARGET_LABEL_NAME] # TODO: still 2-shape: (batch, length) length_ratio_label = batch.labels[C.LENRATIO_LABEL_NAME] assert source.shape[0] == target.shape[0] == label.shape[0] == batch_size assert source.shape[2] == target.shape[2] == num_source_factors == num_target_factors # target first symbol should be BOS # each source sequence contains one EOS symbol assert np.sum(source == eos_id) == batch_size assert np.array_equal(target[:, 0], expected_first_target_symbols) # label first symbol should be 2nd target symbol assert np.array_equal(label[:, 0], target[:, 1, 0]) # each label sequence contains one EOS symbol assert np.sum(label == eos_id) == batch_size train_iter.reset()