class AssignGTtoDefaultBox(object): def __init__(self): self.default_box = dboxes300() self.encoder = Encoder(self.default_box) def __call__(self, image, target): # boxes : target bounding boxes in shape [batch, n_objects, 4] # labels : target labels in shape [batch, n_objects] boxes = target['boxes'] labels = target['labels'] # assign ground truth to default bounding boxes # bboxes_out : [batch, 8732, 4] # labels_out : [batch, 8732] bboxes_out, labels_out = self.encoder.encode(boxes, labels) target['boxes'] = bboxes_out target['labels'] = labels_out return image, target
('Hruskova', 'Czech'), ('Veverka', 'Czech'), ('Antonopoulos', 'Greek'), ('Leontarakis', 'Greek'), ('Fujishima', 'Japanese'), ('Hayashi', 'Japanese'), ('Park', 'Korean'), ('Seok', 'Korean'), ('Álvarez', 'Spanish'), ('Pérez', 'Spanish'), ] # Encoded dataset: a list of (input, output) pairs encoded as tensors. The # list is populated below. enc_data: List[Tuple[Tensor, Tensor]] = [] ################################################## # Encoding ################################################## # Create the encoder for the input characters char_enc = Encoder(char for name, _ in data for char in name) # Create the encoder for the output languages lang_enc = Encoder(lang for _, lang in data) # Encode the dataset for name, lang in data: enc_inp = torch.tensor([char_enc.encode(char) for char in name]) enc_out = torch.tensor(lang_enc.encode(lang)) enc_data.append((enc_inp, enc_out))
def test_box_encoder(): torch.cuda.device(0) # np.random.seed(0) # source boxes box_list = [] for _ in range(128): box_list.append(b1) N, bboxes_cat, offsets, bboxes = load_bboxes(box_list, True) # N, bboxes_cat, offsets, bboxes = load_bboxes([b1[:2,:], b1[:2,:]]) print(N, bboxes_cat, offsets) label_numpy = np.random.randn(offsets[-1]) * 10 labels = torch.tensor(label_numpy.astype(np.int64)).cuda() # target boxes are default boxes from SSD dboxes = dboxes300_coco() dboxes = torch.tensor(np.array(dboxes(order='ltrb')).astype(np.float32)) # print(dboxes[:10, :]) start = time.time() bbox_out, label_out = C.box_encoder(N, bboxes_cat, offsets, labels, dboxes.cuda(), 0.5) torch.cuda.synchronize() end = time.time() cuda_time = end - start # print('bbox_out: {}'.format(bbox_out.shape)) # print(bbox_out.cpu()) # print('label_out: {}'.format(label_out.shape)) # print(label_out.cpu()) # reference dboxes = dboxes300_coco() encoder = Encoder(dboxes) labels_ref = torch.tensor(label_numpy.astype(np.int64)) start = time.time() ref_boxes = [] ref_labels = [] for i, bbox in enumerate(bboxes): label_slice = labels_ref[offsets[i]:offsets[i + 1]] bbox_ref_out, label_ref_out = encoder.encode(bbox.cpu(), label_slice.cpu(), criteria=0.5) ref_boxes.append(bbox_ref_out) ref_labels.append(label_ref_out) end = time.time() ref_time = end - start ref_boxes = torch.cat(ref_boxes) ref_labels = torch.cat(ref_labels) # print('ref bbox: {}'.format(ref_boxes.shape)) # print(bbox_ref_out) r = np.isclose(ref_boxes.numpy(), bbox_out.cpu().numpy()) # r = np.isclose(bbox_ref_out.numpy(), bbox_out.cpu().numpy()) num_fail = 0 for i, res in enumerate(r): if not res.any(): num_fail += 1 print(i, res, ref_boxes[i, :], bbox_out[i, :]) print('{} bboxes failed'.format(num_fail)) label_out = label_out.cpu().numpy() torch.cuda.synchronize() # r2 = np.isclose(label_out, label_ref_out.cpu().numpy()) r2 = np.isclose(label_out, ref_labels.cpu().numpy()) num_fail = 0 for i, res in enumerate(r2): if not res: num_fail += 1 print('label: ', i, res, label_out[i], ref_labels.numpy()[i]) print('{} labels failed'.format(num_fail)) print('CUDA took {}, numpy took: {}'.format(cuda_time, ref_time))
def encode_data(data_set: List[Tuple[Name, Lang]]) \ -> List[Tuple[Tensor, Tensor]]: """Encode a dataset of name/language pairs with tensors. See `sample_data_set` above for an example of a non-encoded dataset. Examples: >>> data = [('Bach', 'German'), ('Mann', 'German'), ('Miles', 'English')] >>> enc_data = encode_data(data) The `enc_data` object should at this point look like this: [ (tensor([0, 1, 2, 3]), tensor(0)) , (tensor([4, 1, 5, 5]), tensor(0)) , (tensor([4, 6, 7, 8, 9]), tensor(1)) ] although the exact values inside may differ (e.g. you may use index `0` to represent character `a`) >>> assert len(enc_data) == len(data) >>> for (inp, out), (x, y) in zip(data, enc_data): ... assert len(inp) == len(x) ... assert isinstance(x, Tensor) ... assert isinstance(y, Tensor) # There are 10 distinct characters in the dataset >>> set(ix.item() for x, y in enc_data for ix in x) {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} # And 2 distinct languages >>> set(y.item() for x, y in enc_data) {0, 1} # Finally, some tests with the sample data set >>> enc_data = encode_data(sample_data_set) >>> set(y.item() for x, y in enc_data) {0, 1, 2, 3, 4, 5, 6} >>> set(ix.item() for x, y in enc_data for ix in x) # doctest:+ELLIPSIS {0, 1, ..., 36, 37} # The number of occurrences of the most frequent character >>> from collections import Counter >>> cnt = Counter(char for name, lang in sample_data_set for char in name) >>> cnt.most_common(1)[0] ('e', 16) >>> cnt = Counter(ix.item() for x, y in enc_data for ix in x) >>> cnt.most_common(1)[0][1] 16 """ # Create the encoders for the input characters and the output languages char_enc = Encoder(char for name, lang in data_set for char in name) lang_enc = Encoder(lang for name, lang in data_set) # Use them to encode the dataset enc_data = [] for name, lang in data_set: enc_name = torch.tensor([char_enc.encode(char) for char in name]) enc_lang = torch.tensor(lang_enc.encode(lang)) enc_data.append((enc_name, enc_lang)) return enc_data
data.append(extract(token_list)) ################################################## # Pre-processing ################################################## def preprocess(inp: Inp) -> Inp: """Lower-case all words in the input sentence.""" return [x.lower() for x in inp] # Apply the pre-processing function to the dataset for i in range(len(data)): inp, out = data[i] data[i] = preprocess(inp), out ################################################## # Encoding ################################################## # Create the encoder fo the input words word_enc = Encoder(word for inp, _ in data for word in inp) # Create the encoder for the POS tags tag_enc = Encoder(pos for _, out in data for pos in out) for inp, out in data: enc_inp = torch.tensor([word_enc.encode(word) for word in inp]) enc_out = torch.tensor([tag_enc.encode(pos) for pos in out]) enc_data.append((enc_inp, enc_out))