def test_restoration_integrity(self): char_table = CharacterTable() char_original = 'c' char_restored = char_table.decode(char_table.encode(char_original)) self.assertEqual(char_restored, char_original)
def test_decode_without_blanks_and_repeatitions(self): char_table = CharacterTable() decoder = CTCOutputDecoder(char_table) labels = [char_table.encode(ch) for ch in self.original_text] self.assertEqual(decoder.decode(labels), 'Helo, world!')
def setUp(self): self.seqs_in = [ [[1, 1], [2, 2], [3, 3]], [[4, 4]] ] self.seqs_out = [ [34, 85, 23], [28] ] char_table = CharacterTable() self.char_table = char_table self.start = char_table.encode(char_table.start) self.sentinel = char_table.encode(char_table.sentinel) self.adapter = Seq2seqAdapter(self.start, self.sentinel, num_classes=len(char_table))
def test_decode_one_label_sequence(self): char_table = CharacterTable() decoder = CTCOutputDecoder(char_table) original = 'c' labels = [char_table.encode(original)] self.assertEqual(decoder.decode(labels), original)
def text_to_codes(self, text): char_table = CharacterTable() codes = [char_table.encode(ch) for ch in text] blank = len(char_table) seq = [blank] for code in codes: seq.append(code) seq.append(blank) return seq
def dummy_source(): sin = 'HHHH eee lll lll ooo ,,, www oooo rrr lll ddd' sout = 'Hello, world' char_table = CharacterTable() codes = [char_table.encode(ch) for ch in sin] x = to_categorical(codes, num_classes=len(char_table)) x = x.reshape(1, len(sin), -1) return PreLoadedSource(x, [sout])
def test_decode_labels(self): char_table = CharacterTable() decoder = CTCOutputDecoder(char_table) blank = len(char_table) num_repeated = 4 labels = [blank] * num_repeated for ch in self.original_text: label = char_table.encode(ch) labels.extend([label] * num_repeated) labels.extend([blank] * num_repeated) self.assertEqual(decoder.decode(labels), self.original_text)
def test_mapping_is_one_to_one(self): char_table = CharacterTable() decoded_chars = [] for code in range(len(char_table)): ch = char_table.decode(code) decoded_chars.append(ch) self.assertEqual( len(decoded_chars), len(set(decoded_chars)), 'Got duplicate characters from different codes: {}'.format( decoded_chars)) encoded_chars = [] for ch in decoded_chars: encoded_chars.append(char_table.encode(ch)) self.assertEqual( len(encoded_chars), len(set(encoded_chars)), '2 or more characters got mapped to the same code:'.format( encoded_chars))
def test_sentinel(self): char_table = CharacterTable() sentinel = char_table.sentinel decoded = char_table.decode(char_table.encode(sentinel)) self.assertEqual(decoded, sentinel)