def test(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 2 3 0.3 1 3 -1 0.4 2 3 -1 0.5 2 1 5 0.55 3 ''' s1 = ''' 0 1 -1 0.6 1 ''' s2 = ''' 0 1 6 0.7 1 0 7 0.8 1 0 8 0.9 1 2 -1 1.0 2 ''' cpu_device = torch.device('cpu') cuda_device = torch.device('cuda', 0) for device in (cpu_device, cuda_device): fsa0 = k2.Fsa.from_str(s0) fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]).to_(device) fsa = k2.union(fsa_vec) assert torch.allclose( fsa.arcs.values()[:, :3], torch.tensor([ [0, 1, 0], # fsa 0 [0, 4, 0], # fsa 1 [0, 5, 0], # fsa 2 # now for fsa0 [1, 2, 1], [1, 3, 2], [2, 3, 3], [2, 7, -1], [3, 7, -1], [3, 2, 5], # fsa1 [4, 7, -1], # fsa2 [5, 6, 6], [6, 5, 7], [6, 5, 8], [6, 7, -1] ]).to(torch.int32).to(device)) assert torch.allclose( fsa.scores, torch.tensor([ 0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.7, 0.8, 0.9, 1.0 ]).to(device))
def lexicon_fst(args): ''' This programme create lexicon.fst.pdf and lexicon.fst.txt based on args.word_file input: args: name_space return: lexicon: k2.Fsa, lexicon fst output: lexicon.fst.txt and lexicon.fst.pdf in args.data_directory By lexicon fst, we compress the repeated chars in emission fst. ''' symbols_str = symboletable(args) symbols_paris = symbols_str.split('\n') num_noneps = len(symbols_paris) - 1 symbol2fst = [None] # <eps> has no fst for i in range(1, num_noneps + 1): s = ''' 0 1 %d %d 0.0 1 1 %d 0 0.0 1 2 -1 -1 0.0 2 ''' % (i, i, i) g = k2.Fsa.from_str(s, acceptor=False) symbol2fst.append(g) fst_vec = k2.create_fsa_vec(symbol2fst[1:]) fst_union = k2.union(fst_vec) lexicon = k2.closure(fst_union) lexicon.draw(os.path.join(args.data_directory, 'lexicon.fst.pdf'), title='lexicon') # lexicon.symbols = k2.SymbolTable.from_str(symbols_str) # lexicon.aux_symbols = k2.SymbolTable.from_str(symbols_str) with open(os.path.join(args.data_directory, 'lexicon.fst.txt'), 'w') as f: f.write(k2.to_str(lexicon))
def test(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 2 3 0.3 1 3 -1 0.4 2 3 -1 0.5 2 1 5 0.55 3 ''' s1 = ''' 0 1 -1 0.6 1 ''' s2 = ''' 0 1 6 0.7 1 0 7 0.8 1 0 8 0.9 1 2 -1 1.0 2 ''' fsa0 = k2.Fsa.from_str(s0) fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]) fsa = k2.union(fsa_vec) dot = k2.to_dot(fsa) dot.render('/tmp/fsa', format='pdf') # the fsa is saved to /tmp/fsa.pdf print(fsa)
def test_autograd(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 3 -1 0.3 1 2 2 0.4 2 3 -1 0.5 3 ''' s1 = ''' 0 2 -1 0.6 0 1 1 0.7 1 2 -1 0.8 2 ''' s2 = ''' 0 1 1 1.1 1 2 -1 1.2 2 ''' devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda', 0)) for device in devices: fsa0 = k2.Fsa.from_str(s0).to(device).requires_grad_(True) fsa1 = k2.Fsa.from_str(s1).to(device).requires_grad_(True) fsa2 = k2.Fsa.from_str(s2).to(device).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]) fsa = k2.union(fsa_vec) fsa_vec = k2.create_fsa_vec([fsa]) log_like = k2.get_tot_scores(fsa_vec, log_semiring=True, use_double_scores=False) # expected log_like and gradients are computed using gtn. # See https://bit.ly/35uVaUv log_like.backward() expected_log_like = torch.tensor([3.1136]).to(log_like) assert torch.allclose(log_like, expected_log_like) expected_grad_fsa0 = torch.tensor([ 0.18710044026374817, 0.08949274569749832, 0.06629786640405655, 0.12080258131027222, 0.21029533445835114 ]).to(device) expected_grad_fsa1 = torch.tensor([ 0.08097638934850693, 0.19916976988315582, 0.19916976988315582 ]).to(device) expected_grad_fsa2 = torch.tensor( [0.4432605803012848, 0.4432605803012848]).to(device) assert torch.allclose(fsa0.grad, expected_grad_fsa0) assert torch.allclose(fsa1.grad, expected_grad_fsa1) assert torch.allclose(fsa2.grad, expected_grad_fsa2)
def _construct_f(fsa_vec: k2.Fsa) -> k2.Fsa: num_fsa = fsa_vec.shape[0] union = k2.union(fsa_vec) union.aux_labels = torch.zeros(union.num_arcs) union.aux_labels[0:num_fsa] = torch.tensor(list(range(1, 1 + num_fsa)), dtype=torch.int32) union_str = k2.to_str_simple(union) states_num = union.shape[0] new_str_array = [] new_str_array.append("0 {} -1 0 0".format(states_num - 1)) for line in union_str.strip().split("\n"): tokens = line.strip().split(" ") if len(tokens) == 5: tokens[1] = '0' if int(tokens[1]) == states_num - 1 else tokens[1] tokens[2] = '0' if int(tokens[2]) == -1 else tokens[2] new_str_array.append(" ".join(tokens)) new_str = "\n".join(new_str_array) new_fsa = k2.Fsa.from_str(new_str, num_aux_labels=1) new_fsa_invert = k2.invert(new_fsa) return new_fsa_invert
def test(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 2 3 0.3 1 3 -1 0.4 2 3 -1 0.5 2 1 5 0.55 3 ''' s1 = ''' 0 1 -1 0.6 1 ''' s2 = ''' 0 1 6 0.7 1 0 7 0.8 1 0 8 0.9 1 2 -1 1.0 2 ''' for device in self.devices: fsa0 = k2.Fsa.from_str(s0) fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa0.tensor_attr = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.int32, device=device) fsa0.ragged_tensor_attr = k2.RaggedTensor( fsa0.tensor_attr.unsqueeze(-1)) fsa1.tensor_attr = torch.tensor([7], dtype=torch.int32, device=device) fsa1.ragged_tensor_attr = k2.RaggedTensor( fsa1.tensor_attr.unsqueeze(-1)) fsa2.tensor_attr = torch.tensor([8, 9, 10, 11], dtype=torch.int32, device=device) fsa2.ragged_tensor_attr = k2.RaggedTensor( fsa2.tensor_attr.unsqueeze(-1)) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]).to(device) fsa = k2.union(fsa_vec) expected_tensor_attr = torch.tensor( [0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]).to(fsa.tensor_attr) assert torch.all(torch.eq(fsa.tensor_attr, expected_tensor_attr)) expected_ragged_tensor_attr = k2.RaggedTensor( expected_tensor_attr.unsqueeze(-1)).remove_values_eq(0) assert str(expected_ragged_tensor_attr) == str( fsa.ragged_tensor_attr) assert torch.allclose( fsa.arcs.values()[:, :3], torch.tensor([ [0, 1, 0], # fsa 0 [0, 4, 0], # fsa 1 [0, 5, 0], # fsa 2 # now for fsa0 [1, 2, 1], [1, 3, 2], [2, 3, 3], [2, 7, -1], [3, 7, -1], [3, 2, 5], # fsa1 [4, 7, -1], # fsa2 [5, 6, 6], [6, 5, 7], [6, 5, 8], [6, 7, -1] ]).to(torch.int32).to(device)) assert torch.allclose( fsa.scores, torch.tensor([ 0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.7, 0.8, 0.9, 1.0 ]).to(device))