def test_acceptor_wo_arcs_from_openfst(self): s1 = ''' ''' s2 = ''' 0 Inf 1 0.1 ''' s3 = ''' 0 Inf 1 0.1 2 0.2 ''' for device in self.devices: fsa1 = k2.Fsa.from_openfst(s1) print("fsa1 = ", k2.to_str_simple(fsa1)) self.assertEqual('', k2.to_str_simple(fsa1)) fsa2 = k2.Fsa.from_openfst(s2) self.assertEqual(_remove_leading_spaces(k2.to_str_simple(fsa2)), "1 2 -1 -0.1\n2") arcs2 = fsa2.arcs.values()[:, :-1] assert torch.all( torch.eq(arcs2, torch.tensor([[1, 2, -1]], dtype=torch.int32))) fsa3 = k2.Fsa.from_openfst(s3) self.assertEqual(fsa3.arcs.dim0(), 4) self.assertEqual(_remove_leading_spaces(k2.to_str_simple(fsa3)), "1 3 -1 -0.1\n2 3 -1 -0.2\n3")
def test_simplified(self): for device in self.devices: s = ''' [ [1 2 2] [1 2 3] ] ''' ragged_int = k2.RaggedInt(s).to(device) fsa_vec_ragged = k2.ctc_graph(ragged_int, True) fsa_vec = k2.ctc_graph([[1, 2, 2], [1, 2, 3]], True, device) expected_str0 = '\n'.join([ '0 0 0 0 0', '0 1 1 1 0', '1 2 0 0 0', '1 1 1 0 0', '1 3 2 2 0', '2 2 0 0 0', '2 3 2 2 0', '3 4 0 0 0', '3 3 2 0 0', '3 5 2 2 0', '4 4 0 0 0', '4 5 2 2 0', '5 6 0 0 0', '5 5 2 0 0', '5 7 -1 0 0', '6 6 0 0 0', '6 7 -1 0 0', '7' ]) expected_str1 = '\n'.join([ '0 0 0 0 0', '0 1 1 1 0', '1 2 0 0 0', '1 1 1 0 0', '1 3 2 2 0', '2 2 0 0 0', '2 3 2 2 0', '3 4 0 0 0', '3 3 2 0 0', '3 5 3 3 0', '4 4 0 0 0', '4 5 3 3 0', '5 6 0 0 0', '5 5 3 0 0', '5 7 -1 0 0', '6 6 0 0 0', '6 7 -1 0 0', '7' ]) actual_str_ragged0 = k2.to_str_simple(fsa_vec_ragged[0].to('cpu')) actual_str_ragged1 = k2.to_str_simple(fsa_vec_ragged[1].to('cpu')) actual_str0 = k2.to_str_simple(fsa_vec[0].to('cpu')) actual_str1 = k2.to_str_simple(fsa_vec[1].to('cpu')) assert actual_str0.strip() == expected_str0 assert actual_str1.strip() == expected_str1 assert actual_str_ragged0.strip() == expected_str0 assert actual_str_ragged1.strip() == expected_str1
def test_acceptor_from_openfst(self): s = ''' 0 1 2 -1.2 0 2 10 -2.2 1 6 1 -3.2 1 3 3 -4.2 2 6 2 -5.2 2 4 2 -6.2 3 6 3 -7.2 5 0 1 -8.2 7 6 -9.2 ''' for i in range(4): if i == 0: fsa = k2.Fsa.from_openfst(s) elif i == 1: fsa = k2.Fsa.from_openfst(s, acceptor=True) elif i == 2: fsa = k2.Fsa.from_openfst(s, num_aux_labels=0) else: fsa = k2.Fsa.from_openfst(s, aux_label_names=[]) expected_str = ''' 0 1 2 -1.2 0 2 10 -2.2 1 6 1 -3.2 1 3 3 -4.2 2 6 2 -5.2 2 4 2 -6.2 3 6 3 -7.2 5 0 1 -8.2 6 8 -1 -9.2 7 8 -1 0 8 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa, openfst=True)) arcs = fsa.arcs.values()[:, :-1] assert isinstance(arcs, torch.Tensor) assert arcs.dtype == torch.int32 assert arcs.device.type == 'cpu' assert arcs.shape == (10, 3), 'there should be 10 arcs' assert torch.all( torch.eq(arcs[0], torch.tensor([0, 1, 2], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 0], dtype=torch.float32)) fsa.scores *= -1 assert torch.allclose( fsa.scores, torch.tensor( [-1.2, -2.2, -3.2, -4.2, -5.2, -6.2, -7.2, -8.2, -9.2, 0], dtype=torch.float32))
def test(self): for device in self.devices: for score in [-0.5, -0.501, -0.502]: s = ''' [ [1 2 3] [ ] [4 5 6] ] ''' ragged_int = k2.RaggedTensor(s).to(device) fsa_vec_ragged = k2.levenshtein_graph(ragged_int, ins_del_score=score) fsa_vec = k2.levenshtein_graph([[1, 2, 3], [], [4, 5, 6]], device=device, ins_del_score=score) expected_str0 = '\n'.join([ f'0 0 0 0 {score}', '0 1 0 1 -0.5', '0 1 1 1 0', f'1 1 0 0 {score}', '1 2 0 2 -0.5', '1 2 2 2 0', f'2 2 0 0 {score}', '2 3 0 3 -0.5', '2 3 3 3 0', f'3 3 0 0 {score}', '3 4 -1 -1 0', '4' ]) expected_str1 = '\n'.join( [f'0 0 0 0 {score}', '0 1 -1 -1 0', '1']) expected_str2 = '\n'.join([ f'0 0 0 0 {score}', '0 1 0 4 -0.5', '0 1 4 4 0', f'1 1 0 0 {score}', '1 2 0 5 -0.5', '1 2 5 5 0', f'2 2 0 0 {score}', '2 3 0 6 -0.5', '2 3 6 6 0', f'3 3 0 0 {score}', '3 4 -1 -1 0', '4' ]) actual_str_ragged0 = k2.to_str_simple( fsa_vec_ragged[0].to('cpu')) actual_str_ragged1 = k2.to_str_simple( fsa_vec_ragged[1].to('cpu')) actual_str_ragged2 = k2.to_str_simple( fsa_vec_ragged[2].to('cpu')) actual_str0 = k2.to_str_simple(fsa_vec[0].to('cpu')) actual_str1 = k2.to_str_simple(fsa_vec[1].to('cpu')) actual_str2 = k2.to_str_simple(fsa_vec[2].to('cpu')) assert actual_str0.strip() == expected_str0 assert actual_str1.strip() == expected_str1 assert actual_str2.strip() == expected_str2 assert actual_str_ragged0.strip() == expected_str0 assert actual_str_ragged1.strip() == expected_str1 assert actual_str_ragged2.strip() == expected_str2 offset_value = score - (-0.5) expected_offset = torch.tensor([ offset_value, 0, 0, offset_value, 0, 0, offset_value, 0, 0, offset_value, 0, offset_value, 0, offset_value, 0, 0, offset_value, 0, 0, offset_value, 0, 0, offset_value, 0 ], dtype=torch.float32) offset_ragged = getattr( fsa_vec_ragged, "__ins_del_score_offset_internal_attr_") offset_ragged = offset_ragged.to('cpu') offset = getattr( fsa_vec, "__ins_del_score_offset_internal_attr_").to('cpu') assert torch.allclose(expected_offset, offset_ragged) assert torch.allclose(expected_offset, offset)
def test_treat_epsilon_specially_true(self): # this version works only on CPU and requires # arc-sorted inputs # a_fsa recognizes `(1|3)?2*` s1 = ''' 0 1 3 0.0 0 1 1 0.2 0 1 0 0.1 1 1 2 0.3 1 2 -1 0.4 2 ''' a_fsa = k2.Fsa.from_str(s1) a_fsa.requires_grad_(True) # b_fsa recognizes `1|2|5` s2 = ''' 0 1 5 0 0 1 1 1 0 1 2 2 1 2 -1 3 2 ''' b_fsa = k2.Fsa.from_str(s2) b_fsa.requires_grad_(True) # fsa recognizes 1|2 fsa = k2.intersect(k2.arc_sort(a_fsa), k2.arc_sort(b_fsa)) assert len(fsa.shape) == 2 actual_str = k2.to_str_simple(fsa) expected_str = '\n'.join( ['0 1 0 0.1', '0 2 1 1.2', '1 2 2 2.3', '2 3 -1 3.4', '3']) assert actual_str.strip() == expected_str loss = fsa.scores.sum() (-loss).backward() # arc 1, 2, 3, and 4 of a_fsa are kept in the final intersected FSA assert torch.allclose(a_fsa.grad, torch.tensor([0, -1, -1, -1, -1]).to(a_fsa.grad)) # arc 1, 2, and 3 of b_fsa are kept in the final intersected FSA assert torch.allclose(b_fsa.grad, torch.tensor([0, -1, -1, -1]).to(b_fsa.grad)) # if any of the input FSA is an FsaVec, # the outupt FSA is also an FsaVec. a_fsa.scores.grad = None b_fsa.scores.grad = None a_fsa = k2.create_fsa_vec([a_fsa]) fsa = k2.intersect(k2.arc_sort(a_fsa), k2.arc_sort(b_fsa)) assert len(fsa.shape) == 3
def test_treat_epsilon_specially_false(self): devices = [torch.device('cpu')] if torch.cuda.is_available() and k2.with_cuda: devices.append(torch.device('cuda')) for device in devices: # a_fsa recognizes `(0|1)2*` s1 = ''' 0 1 0 0.1 0 1 1 0.2 1 1 2 0.3 1 2 -1 0.4 2 ''' a_fsa = k2.Fsa.from_str(s1).to(device) a_fsa.requires_grad_(True) # b_fsa recognizes `1|2` s2 = ''' 0 1 1 1 0 1 2 2 1 2 -1 3 2 ''' b_fsa = k2.Fsa.from_str(s2).to(device) b_fsa.requires_grad_(True) # fsa recognizes `1` fsa = k2.intersect(a_fsa, b_fsa, treat_epsilons_specially=False) assert len(fsa.shape) == 2 actual_str = k2.to_str_simple(fsa) expected_str = '\n'.join(['0 1 1 1.2', '1 2 -1 3.4', '2']) assert actual_str.strip() == expected_str loss = fsa.scores.sum() (-loss).backward() # arc 1 and 3 of a_fsa are kept in the final intersected FSA assert torch.allclose(a_fsa.grad, torch.tensor([0, -1, 0, -1]).to(a_fsa.grad)) # arc 0 and 2 of b_fsa are kept in the final intersected FSA assert torch.allclose(b_fsa.grad, torch.tensor([-1, 0, -1]).to(b_fsa.grad)) # if any of the input FSA is an FsaVec, # the outupt FSA is also an FsaVec. a_fsa.scores.grad = None b_fsa.scores.grad = None a_fsa = k2.create_fsa_vec([a_fsa]) fsa = k2.intersect(a_fsa, b_fsa, treat_epsilons_specially=False) assert len(fsa.shape) == 3
def test_transducer2_from_str(self): s = ''' 0 1 2 22 101 -1.2 0 2 10 100 102 -2.2 1 6 -1 16 103 -4.2 1 3 3 33 104 -3.2 2 6 -1 26 105 -5.2 2 4 2 22 106 -6.2 3 6 -1 36 107 -7.2 5 0 1 50 108 -8.2 6 ''' for i in range(2): if i == 0: fsa = k2.Fsa.from_str(s, num_aux_labels=2) else: fsa = k2.Fsa.from_str( s, aux_label_names=['aux_labels', 'aux_labels2']) assert fsa.aux_labels.dtype == torch.int32 assert fsa.aux_labels.device.type == 'cpu' assert torch.all( torch.eq( fsa.aux_labels, torch.tensor([22, 100, 16, 33, 26, 22, 36, 50], dtype=torch.int32))) assert torch.all( torch.eq( fsa.aux_labels2, torch.tensor([101, 102, 103, 104, 105, 106, 107, 108], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([-1.2, -2.2, -4.2, -3.2, -5.2, -6.2, -7.2, -8.2], dtype=torch.float32)) # only aux_labels will be printed right now.. expected_str = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 -1 16 -4.2 1 3 3 33 -3.2 2 6 -1 26 -5.2 2 4 2 22 -6.2 3 6 -1 36 -7.2 5 0 1 50 -8.2 6 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa))
def test_transducer_from_openfst(self): s = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 1 16 -4.2 1 3 3 33 -3.2 2 6 2 26 -5.2 2 4 2 22 -6.2 3 6 3 36 -7.2 5 0 1 50 -8.2 7 -9.2 6 ''' for i in range(3): if i == 0: fsa = k2.Fsa.from_openfst(s, acceptor=False) elif i == 1: fsa = k2.Fsa.from_openfst(s, num_aux_labels=1) else: fsa = k2.Fsa.from_openfst(s, aux_label_names=['aux_labels']) assert fsa.aux_labels.dtype == torch.int32 assert fsa.aux_labels.device.type == 'cpu' assert torch.all( torch.eq( fsa.aux_labels, torch.tensor([22, 100, 16, 33, 26, 22, 36, 50, -1, -1], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([1.2, 2.2, 4.2, 3.2, 5.2, 6.2, 7.2, 8.2, 0, 9.2], dtype=torch.float32)) expected_str = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 1 16 -4.2 1 3 3 33 -3.2 2 6 2 26 -5.2 2 4 2 22 -6.2 3 6 3 36 -7.2 5 0 1 50 -8.2 6 8 -1 -1 0 7 8 -1 -1 -9.2 8 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa, openfst=True))
def test_acceptor_from_tensor(self): fsa_tensor = torch.tensor([[0, 1, 2, _k2.float_as_int(-1.2)], [0, 2, 10, _k2.float_as_int(-2.2)], [1, 6, -1, _k2.float_as_int(-3.2)], [1, 3, 3, _k2.float_as_int(-4.2)], [2, 6, -1, _k2.float_as_int(-5.2)], [2, 4, 2, _k2.float_as_int(-6.2)], [3, 6, -1, _k2.float_as_int(-7.2)], [5, 0, 1, _k2.float_as_int(-8.2)]], dtype=torch.int32) fsa = k2.Fsa(fsa_tensor) expected_str = ''' 0 1 2 -1.2 0 2 10 -2.2 1 6 -1 -3.2 1 3 3 -4.2 2 6 -1 -5.2 2 4 2 -6.2 3 6 -1 -7.2 5 0 1 -8.2 6 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa)) arcs = fsa.arcs.values()[:, :-1] assert isinstance(arcs, torch.Tensor) assert arcs.dtype == torch.int32 assert arcs.device.type == 'cpu' assert arcs.shape == (8, 3), 'there should be 8 arcs' assert torch.all( torch.eq(arcs[0], torch.tensor([0, 1, 2], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([-1.2, -2.2, -3.2, -4.2, -5.2, -6.2, -7.2, -8.2], dtype=torch.float32)) fsa.scores *= -1 assert torch.allclose( fsa.scores, torch.tensor([1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2], dtype=torch.float32))
def test_transducer_from_str(self): s = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 -1 16 -4.2 1 3 3 33 -3.2 2 6 -1 26 -5.2 2 4 2 22 -6.2 3 6 -1 36 -7.2 5 0 1 50 -8.2 6 ''' for i in range(3): if i == 0: fsa = k2.Fsa.from_str(s, num_aux_labels=1) elif i == 1: fsa = k2.Fsa.from_str(s, acceptor=False) else: fsa = k2.Fsa.from_str(s, aux_label_names=['aux_labels']) assert fsa.aux_labels.dtype == torch.int32 assert fsa.aux_labels.device.type == 'cpu' assert torch.all( torch.eq( fsa.aux_labels, torch.tensor([22, 100, 16, 33, 26, 22, 36, 50], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([-1.2, -2.2, -4.2, -3.2, -5.2, -6.2, -7.2, -8.2], dtype=torch.float32)) expected_str = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 -1 16 -4.2 1 3 3 33 -3.2 2 6 -1 26 -5.2 2 4 2 22 -6.2 3 6 -1 36 -7.2 5 0 1 50 -8.2 6 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa))
def test(self): s = ''' 0 1 1 0.1 0 2 2 0.2 1 4 -1 0.3 3 4 -1 0.4 4 ''' fsa = k2.Fsa.from_str(s) fsa.requires_grad_(True) expected_str = '\n'.join(['0 1 1 0.1', '1 2 -1 0.3', '2']) connected_fsa = k2.connect(fsa) actual_str = k2.to_str_simple(connected_fsa) assert actual_str.strip() == expected_str loss = connected_fsa.scores.sum() loss.backward() assert torch.allclose(fsa.scores.grad, torch.tensor([1, 0, 1, 0], dtype=torch.float32))
def test(self): s = ''' 0 1 2 0.1 0 1 1 0.2 1 2 -1 0.3 2 ''' fsa = k2.Fsa.from_str(s) fsa.requires_grad_(True) sorted_fsa = k2.arc_sort(fsa) actual_str = k2.to_str_simple(sorted_fsa) expected_str = '\n'.join(['0 1 1 0.2', '0 1 2 0.1', '1 2 -1 0.3', '2']) assert actual_str.strip() == expected_str loss = (sorted_fsa.scores[1] + sorted_fsa.scores[2]) / 2 loss.backward() assert torch.allclose(fsa.scores.grad, torch.tensor([0.5, 0, 0.5], dtype=torch.float32))
def test_transducer_from_tensor(self): for device in self.devices: fsa_tensor = torch.tensor( [[0, 1, 2, _k2.float_as_int(-1.2)], [0, 2, 10, _k2.float_as_int(-2.2)], [1, 6, -1, _k2.float_as_int(-4.2)], [1, 3, 3, _k2.float_as_int(-3.2)], [2, 6, -1, _k2.float_as_int(-5.2)], [2, 4, 2, _k2.float_as_int(-6.2)], [3, 6, -1, _k2.float_as_int(-7.2)], [5, 0, 1, _k2.float_as_int(-8.2)]], dtype=torch.int32).to(device) aux_labels_tensor = torch.tensor([22, 100, 16, 33, 26, 22, 36, 50], dtype=torch.int32).to(device) fsa = k2.Fsa(fsa_tensor, aux_labels_tensor) assert fsa.aux_labels.dtype == torch.int32 assert fsa.aux_labels.device.type == device.type assert torch.all( torch.eq( fsa.aux_labels, torch.tensor([22, 100, 16, 33, 26, 22, 36, 50], dtype=torch.int32).to(device))) assert torch.allclose( fsa.scores, torch.tensor([-1.2, -2.2, -4.2, -3.2, -5.2, -6.2, -7.2, -8.2], dtype=torch.float32, device=device)) expected_str = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 -1 16 -4.2 1 3 3 33 -3.2 2 6 -1 26 -5.2 2 4 2 22 -6.2 3 6 -1 36 -7.2 5 0 1 50 -8.2 6 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa))
def test_acceptor_wo_arcs_from_str(self): s1 = ''' ''' s2 = ''' 0 1 ''' s3 = ''' 1 ''' for device in self.devices: fsa1 = k2.Fsa.from_str(s1) self.assertEqual(k2.to_str_simple(fsa1), '') with self.assertRaises(ValueError): _ = k2.Fsa.from_str(s2) fsa3 = k2.Fsa.from_str(s3) self.assertEqual(fsa3.arcs.dim0(), 2)
def _construct_f(fsa_vec: k2.Fsa) -> k2.Fsa: num_fsa = fsa_vec.shape[0] union = k2.union(fsa_vec) union.aux_labels = torch.zeros(union.num_arcs) union.aux_labels[0:num_fsa] = torch.tensor(list(range(1, 1 + num_fsa)), dtype=torch.int32) union_str = k2.to_str_simple(union) states_num = union.shape[0] new_str_array = [] new_str_array.append("0 {} -1 0 0".format(states_num - 1)) for line in union_str.strip().split("\n"): tokens = line.strip().split(" ") if len(tokens) == 5: tokens[1] = '0' if int(tokens[1]) == states_num - 1 else tokens[1] tokens[2] = '0' if int(tokens[2]) == -1 else tokens[2] new_str_array.append(" ".join(tokens)) new_str = "\n".join(new_str_array) new_fsa = k2.Fsa.from_str(new_str, num_aux_labels=1) new_fsa_invert = k2.invert(new_fsa) return new_fsa_invert
def test(self): for device in self.devices: for use_identity_map, sorted_match_a in [(True, True), (False, True), (True, False), (False, False)]: # recognizes (0|1)(0|2) s1 = ''' 0 1 0 0.1 0 1 1 0.2 1 2 0 0.4 1 2 2 0.3 2 3 -1 0.5 3 ''' # recognizes 02* s2 = ''' 0 1 0 1 1 1 2 2 1 2 -1 3 2 ''' # recognizes 1*0 s3 = ''' 0 0 1 10 0 1 0 20 1 2 -1 30 2 ''' a_fsa = k2.Fsa.from_str(s1).to(device) b_fsa_1 = k2.Fsa.from_str(s2).to(device) b_fsa_2 = k2.Fsa.from_str(s3).to(device) a_fsa.requires_grad_(True) b_fsa_1.requires_grad_(True) b_fsa_2.requires_grad_(True) b_fsas = k2.create_fsa_vec([b_fsa_1, b_fsa_2]) if use_identity_map: a_fsas = k2.create_fsa_vec([a_fsa, a_fsa]) b_to_a_map = torch.tensor([0, 1], dtype=torch.int32).to(device) else: a_fsas = k2.create_fsa_vec([a_fsa]) b_to_a_map = torch.tensor([0, 0], dtype=torch.int32).to(device) c_fsas = k2.intersect_device(a_fsas, b_fsas, b_to_a_map, sorted_match_a) assert c_fsas.shape == (2, None, None) c_fsas = k2.connect(c_fsas.to('cpu')) # c_fsas[0] recognizes: 02 # c_fsas[1] recognizes: 10 actual_str_0 = k2.to_str_simple(c_fsas[0]) expected_str_0 = '\n'.join( ['0 1 0 1.1', '1 2 2 2.3', '2 3 -1 3.5', '3']) assert actual_str_0.strip() == expected_str_0 actual_str_1 = k2.to_str_simple(c_fsas[1]) expected_str_1 = '\n'.join( ['0 1 1 10.2', '1 2 0 20.4', '2 3 -1 30.5', '3']) assert actual_str_1.strip() == expected_str_1 loss = c_fsas.scores.sum() (-loss).backward() assert torch.allclose( a_fsa.grad, torch.tensor([-1, -1, -1, -1, -2]).to(a_fsa.grad)) assert torch.allclose( b_fsa_1.grad, torch.tensor([-1, -1, -1]).to(b_fsa_1.grad)) assert torch.allclose( b_fsa_2.grad, torch.tensor([-1, -1, -1]).to(b_fsa_2.grad))
def test_single_fsa(self): for device in self.devices: # See https://git.io/JY7r4 s = ''' 0 1 0 0.1 0 2 0 0.2 0 0 0 0.3 1 1 0 0.4 1 2 0 0.5 2 3 -1 0.6 3 ''' src = k2.Fsa.from_str(s).to(device).requires_grad_(True) scores_copy = src.scores.detach().clone().requires_grad_(True) src.attr1 = "hello" src.attr2 = "k2" float_attr = torch.tensor([0.1, 0.2, 0.3, 4, 5, 6], dtype=torch.float32, requires_grad=True, device=device) src.float_attr = float_attr.detach().clone().requires_grad_(True) src.int_attr = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.int32, device=device) src.ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [60, 70], [80], [], [0]]).to(device) dest = k2.remove_epsilon_self_loops(src) # arc map is [0, 1, 4, 5] # See https://git.io/JY7oC expected_fsa = k2.Fsa.from_str(''' 0 1 0 0.1 0 2 0 0.2 1 2 0 0.5 2 3 -1 0.6 3 ''') assert k2.to_str_simple(dest) == k2.to_str_simple( expected_fsa), f'{str(dest)}\n{str(expected_fsa)}' assert dest.attr1 == src.attr1 assert dest.attr2 == src.attr2 expected_int_attr = torch.tensor([1, 2, 5, 6], dtype=torch.int32, device=device) assert torch.all(torch.eq(dest.int_attr, expected_int_attr)) expected_ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [], [0]]).to(device) assert dest.ragged_attr == expected_ragged_attr expected_float_attr = torch.empty_like(dest.float_attr) expected_float_attr[0] = float_attr[0] expected_float_attr[1] = float_attr[1] expected_float_attr[2] = float_attr[4] expected_float_attr[3] = float_attr[5] assert torch.all(torch.eq(dest.float_attr, expected_float_attr)) expected_scores = torch.empty_like(dest.scores) expected_scores[0] = scores_copy[0] expected_scores[1] = scores_copy[1] expected_scores[2] = scores_copy[4] expected_scores[3] = scores_copy[5] assert torch.all(torch.eq(dest.scores, expected_scores)) scale = torch.tensor([10, 20, 30, 40]).to(float_attr) (dest.float_attr * scale).sum().backward() (expected_float_attr * scale).sum().backward() assert torch.all(torch.eq(src.float_attr.grad, float_attr.grad)) (dest.scores * scale).sum().backward() (expected_scores * scale).sum().backward() assert torch.all(torch.eq(src.scores.grad, scores_copy.grad))
def test_fsa_vec(self): for device in self.devices: # See https://git.io/JY7r4 s = ''' 0 1 0 0.1 0 2 0 0.2 0 0 0 0.3 1 1 0 0.4 1 2 0 0.5 2 3 -1 0.6 3 ''' fsa1 = k2.Fsa.from_str(s).to(device).requires_grad_(True) scores_copy1 = fsa1.scores.detach().clone().requires_grad_(True) fsa1.attr1 = "hello" float_attr1 = torch.tensor([0.1, 0.2, 0.3, 4, 5, 6], dtype=torch.float32, requires_grad=True, device=device) fsa1.float_attr = float_attr1 fsa1.int_attr = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.int32, device=device) fsa1.ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [60, 70], [80], [], [0]]).to(device) fsa2 = k2.Fsa.from_str(s).to(device).requires_grad_(True) scores_copy2 = fsa2.scores.detach().clone().requires_grad_(True) fsa2.attr2 = "k2" float_attr2 = torch.tensor([1, 2, 3, 40, 50, 60], dtype=torch.float32, requires_grad=True, device=device) fsa2.float_attr = float_attr2 fsa2.int_attr = torch.tensor([10, 20, 30, 4, 5, 6], dtype=torch.int32, device=device) fsa2.ragged_attr = k2.RaggedTensor([[100, 200], [300, 400, 500], [600, 700], [800], [22], [33, 55]]).to(device) src = k2.create_fsa_vec([fsa1, fsa2]) dest = k2.remove_epsilon_self_loops(src) # arc map is[0, 1, 4, 5, 6, 7, 10, 11] # See https://git.io/JY7oC expected_fsa = k2.Fsa.from_str(''' 0 1 0 0.1 0 2 0 0.2 1 2 0 0.5 2 3 -1 0.6 3 ''') assert k2.to_str_simple(dest[0]) == k2.to_str_simple(expected_fsa) assert k2.to_str_simple(dest[1]) == k2.to_str_simple(expected_fsa) assert dest.attr1 == fsa1.attr1 assert dest.attr2 == fsa2.attr2 expected_int_attr = torch.tensor([1, 2, 5, 6, 10, 20, 5, 6], dtype=torch.int32, device=device) assert torch.all(torch.eq(dest.int_attr, expected_int_attr)) expected_ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [], [0], [100, 200], [300, 400, 500], [22], [33, 55]]).to(device) assert dest.ragged_attr == expected_ragged_attr expected_float_attr = torch.empty_like(dest.float_attr) expected_float_attr[0] = float_attr1[0] expected_float_attr[1] = float_attr1[1] expected_float_attr[2] = float_attr1[4] expected_float_attr[3] = float_attr1[5] expected_float_attr[4] = float_attr2[0] expected_float_attr[5] = float_attr2[1] expected_float_attr[6] = float_attr2[4] expected_float_attr[7] = float_attr2[5] assert torch.all(torch.eq(dest.float_attr, expected_float_attr)) expected_scores = torch.empty_like(dest.scores) expected_scores[0] = scores_copy1[0] expected_scores[1] = scores_copy1[1] expected_scores[2] = scores_copy1[4] expected_scores[3] = scores_copy1[5] expected_scores[4] = scores_copy2[0] expected_scores[5] = scores_copy2[1] expected_scores[6] = scores_copy2[4] expected_scores[7] = scores_copy2[5] assert torch.all(torch.eq(dest.scores, expected_scores)) scale = torch.tensor([10, 20, 30, 40, 50, 60, 70, 80]).to(dest.float_attr) (dest.float_attr * scale).sum().backward() (expected_float_attr * scale).sum().backward() assert torch.all(torch.eq(fsa1.float_attr.grad, float_attr1.grad)) assert torch.all(torch.eq(fsa2.float_attr.grad, float_attr2.grad)) (dest.scores * scale).sum().backward() (expected_scores * scale).sum().backward() assert torch.all(torch.eq(fsa1.scores.grad, scores_copy1.grad)) assert torch.all(torch.eq(fsa2.scores.grad, scores_copy2.grad))
def test(self): for device in self.devices: s1 = ''' 0 1 11 11 0 2 12 12 0 3 13 13 1 4 -1 0 2 4 -1 0 3 4 -1 0 4 ''' fsa1 = k2.Fsa.from_str(s1) s2 = ''' 0 1 21 21 0 2 22 22 1 2 23 23 1 3 -1 0 2 3 -1 0 3 ''' fsa2 = k2.Fsa.from_str(s2) s3 = ''' 0 1 31 31 1 2 32 32 1 3 33 33 2 4 -1 0 3 4 -1 0 4 ''' fsa3 = k2.Fsa.from_str(s3) src = k2.create_fsa_vec([fsa1, fsa2, fsa3]).to(device) src.requires_grad_(True) s0 = ''' 0 1 1 1 0 2 2 2 1 3 4 4 2 3 3 3 2 4 -1 0 3 4 -1 0 4 ''' index = k2.Fsa.from_str(s0).to(device) index.requires_grad_(True) index.aux_label = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.int32, device=device) dest = k2.replace_fsa(src, index, 1) actual_str = k2.to_str_simple(dest) expected_str = '\n'.join([ '0 1 0 1', '0 5 0 2', '1 2 11 11', '1 3 12 12', '1 4 13 13', '2 8 0 0', '3 8 0 0', '4 8 0 0', '5 6 21 21', '5 7 22 22', '6 7 23 23', '6 9 0 0', '7 9 0 0', '8 14 4 4', '9 10 0 3', '9 15 -1 0', '10 11 31 31', '11 12 32 32', '11 13 33 33', '12 14 0 0', '13 14 0 0', '14 15 -1 0', '15' ]) assert actual_str.strip() == expected_str assert torch.all( torch.eq( dest.aux_label, torch.tensor([ 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 5, 0, 0, 0, 0, 0, 6 ]).to(device).to(torch.int32))) loss = dest.scores.sum() (-loss).backward() assert torch.allclose( index.grad, torch.tensor([-1, -1, -1, -1, -1, -1]).to(index.grad)) assert torch.allclose( src.grad, torch.tensor([ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ]).to(src.grad))
def test_acceptor_from_str(self): s = ''' 0 1 2 -1.2 0 2 10 -2.2 1 6 -1 -3.2 1 3 3 -4.2 2 6 -1 -5.2 2 4 2 -6.2 3 6 -1 -7.2 5 0 1 -8.2 6 ''' for i in range(4): if i == 0: fsa = k2.Fsa.from_str(s) elif i == 1: fsa = k2.Fsa.from_str(s, acceptor=True) elif i == 2: fsa = k2.Fsa.from_str(s, num_aux_labels=0) else: fsa = k2.Fsa.from_str(s, aux_label_names=[]) expected_str = ''' 0 1 2 -1.2 0 2 10 -2.2 1 6 -1 -3.2 1 3 3 -4.2 2 6 -1 -5.2 2 4 2 -6.2 3 6 -1 -7.2 5 0 1 -8.2 6 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa)) arcs = fsa.arcs.values()[:, :-1] assert isinstance(arcs, torch.Tensor) assert arcs.dtype == torch.int32 assert arcs.device.type == 'cpu' assert arcs.shape == (8, 3), 'there should be 8 arcs' assert torch.all( torch.eq(arcs[0], torch.tensor([0, 1, 2], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([-1.2, -2.2, -3.2, -4.2, -5.2, -6.2, -7.2, -8.2], dtype=torch.float32)) fsa.scores *= -1 assert torch.allclose( fsa.scores, torch.tensor([1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2], dtype=torch.float32)) # test that assigning to labels calls _k2.fix_final_labels as it # should. fsa.labels = torch.tensor([-1, 10, 0, 1, -1, 1, 0, 2], dtype=torch.int32) assert torch.all( torch.eq( fsa.labels, torch.tensor([0, 10, -1, 1, -1, 1, -1, 2], dtype=torch.int32)))
def test_transducer3_from_openfst(self): s = ''' 0 1 2 22 33 44 -1.2 0 2 10 100 101 102 -2.2 1 6 1 16 17 18 -4.2 1 3 3 33 34 35 -3.2 2 6 2 26 27 28 -5.2 2 4 2 22 23 24 -6.2 3 6 3 36 37 38 -7.2 5 0 1 50 51 52 -8.2 7 -9.2 6 ''' for i in range(2): if i == 0: fsa = k2.Fsa.from_openfst(s, num_aux_labels=3) else: fsa = k2.Fsa.from_openfst(s, aux_label_names=[ 'aux_labels', 'aux_labels2', 'aux_labels3' ]) assert fsa.aux_labels.dtype == torch.int32 assert fsa.aux_labels.device.type == 'cpu' assert torch.all( torch.eq( fsa.aux_labels, torch.tensor([22, 100, 16, 33, 26, 22, 36, 50, -1, -1], dtype=torch.int32))) assert fsa.aux_labels2.dtype == torch.int32 assert fsa.aux_labels2.device.type == 'cpu' assert torch.all( torch.eq( fsa.aux_labels2, torch.tensor([33, 101, 17, 34, 27, 23, 37, 51, -1, -1], dtype=torch.int32))) assert fsa.aux_labels3.dtype == torch.int32 assert fsa.aux_labels3.device.type == 'cpu' assert torch.all( torch.eq( fsa.aux_labels3, torch.tensor([44, 102, 18, 35, 28, 24, 38, 52, -1, -1], dtype=torch.int32))) assert torch.allclose( fsa.scores, torch.tensor([1.2, 2.2, 4.2, 3.2, 5.2, 6.2, 7.2, 8.2, 0, 9.2], dtype=torch.float32)) expected_str = ''' 0 1 2 22 -1.2 0 2 10 100 -2.2 1 6 1 16 -4.2 1 3 3 33 -3.2 2 6 2 26 -5.2 2 4 2 22 -6.2 3 6 3 36 -7.2 5 0 1 50 -8.2 6 8 -1 -1 0 7 8 -1 -1 -9.2 8 ''' assert _remove_leading_spaces(expected_str) == \ _remove_leading_spaces(k2.to_str_simple(fsa, openfst=True))
def test(self): s = ''' 0 1 2 0.1 0 1 1 0.2 1 2 -1 0.3 2 ''' for device in self.devices: src = k2.Fsa.from_str(s).to(device) src.requires_grad_(True) scores_copy = src.scores.detach().clone().requires_grad_(True) src.attr1 = "hello" src.attr2 = "k2" float_attr = torch.tensor([0.1, 0.2, 0.3], dtype=torch.float32, requires_grad=True, device=device) src.float_attr = float_attr.detach().clone().requires_grad_(True) src.int_attr = torch.tensor([1, 2, 3], dtype=torch.int32, device=device) src.ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [60, 70]]).to(device) dest, arc_map = k2.arc_sort(src, ret_arc_map=True) assert dest.attr1 == src.attr1 assert dest.attr2 == src.attr2 expected_arc_map = torch.tensor([1, 0, 2], dtype=torch.int32, device=device) assert torch.all(torch.eq(arc_map, expected_arc_map)) actual_str = k2.to_str_simple(dest) expected_str = '\n'.join( ['0 1 1 0.2', '0 1 2 0.1', '1 2 -1 0.3', '2']) assert actual_str.strip() == expected_str expected_int_attr = torch.tensor([2, 1, 3], dtype=torch.int32, device=device) assert torch.all(torch.eq(dest.int_attr, expected_int_attr)) expected_ragged_attr = k2.RaggedTensor([[30, 40, 50], [10, 20], [60, 70]]).to(device) assert dest.ragged_attr == expected_ragged_attr expected_float_attr = torch.empty_like(dest.float_attr) expected_float_attr[0] = float_attr[1] expected_float_attr[1] = float_attr[0] expected_float_attr[2] = float_attr[2] assert torch.all(torch.eq(dest.float_attr, expected_float_attr)) expected_scores = torch.empty_like(dest.scores) expected_scores[0] = scores_copy[1] expected_scores[1] = scores_copy[0] expected_scores[2] = scores_copy[2] assert torch.all(torch.eq(dest.scores, expected_scores)) scale = torch.tensor([10, 20, 30]).to(float_attr) (dest.float_attr * scale).sum().backward() (expected_float_attr * scale).sum().backward() assert torch.all(torch.eq(src.float_attr.grad, float_attr.grad)) (dest.scores * scale).sum().backward() (expected_scores * scale).sum().backward() assert torch.all(torch.eq(src.scores.grad, scores_copy.grad))