class TestCerFirst(unittest.TestCase): """ """ def setUp(self) -> None: self.word_tokenizer = WordTokenizer() def test_into(self): """ """ r = self.word_tokenizer.tokenize(token_string="into", brackets_list=[]) self.assertEqual(r, ["into"]) def test_into_2(self): """ """ r = self.word_tokenizer.tokenize(token_string="in to", brackets_list=[]) self.assertEqual(r, ["in", "to"]) def test_into_3(self): """ """ r = self.word_tokenizer.tokenize(token_string="into the monitor", brackets_list=[]) self.assertEqual(r, ["into", "the", "monitor"]) def test_into_4(self): """ """ r = self.word_tokenizer.tokenize(token_string="into", brackets_list=["()"]) self.assertEqual(r, ["into"]) def test_into_5(self): """ """ r = self.word_tokenizer.tokenize(token_string="in to", brackets_list=["()"]) self.assertEqual(r, ["in", "to"]) def test_into_6(self): """ """ r = self.word_tokenizer.tokenize(token_string="into the monitor", brackets_list=["()"]) self.assertEqual(r, ["into", "the", "monitor"])
def update_alignment_result(self, alignment_result, process_output_digit): word_tokenizer = WordTokenizer() calculator = UKKLevenshteinDistanceCalculator( tokenizer=None, get_alignment_result=False) # get output token list output_token_list = alignment_result.get_outputs() reference_token_list = alignment_result.get_reference() old_distance = alignment_result.calculate_three_kinds_of_distance()[0] generator = SimpleReferenceCombinationGenerator() tmp_result = None no_digit = True if process_output_digit: token_list_to_check_digit = output_token_list else: token_list_to_check_digit = reference_token_list for current_str in token_list_to_check_digit: result_digit = self.our_is_digit(current_str) if result_digit: no_digit = False for r in result_digit: # tokenize the string tokenized_r = [] for option in r: tokenized_r.append( word_tokenizer.tokenize(option, to_lower=True, remove_punctuation=True)) generator.add_new_token_options(tokenized_r) else: generator.add_new_token_options([current_str]) if no_digit: return None for x in generator.get_all_reference(): if process_output_digit: distance = calculator.get_result_from_list( reference_token_list, x).distance else: distance = calculator.get_result_from_list( x, output_token_list).distance if distance < old_distance: old_distance = distance tmp_result = x if tmp_result is None: return None calculator2 = UKKLevenshteinDistanceCalculator( tokenizer=None, get_alignment_result=True) if process_output_digit: update_result = calculator2.get_result_from_list( reference_token_list, tmp_result).alignment_result else: update_result = calculator2.get_result_from_list( tmp_result, output_token_list).alignment_result return update_result