def test_get_context_regex(self): rule = Rule([{"voice": "-"}], [{"voice": "-"}], [{"cons": "+"}, {"voice": "-"}], [{"low": "+"}], obligatory=True) left_context_feature_bundle_list = rule.left_context_feature_bundle_list right_context_feature_bundle_list = rule.right_context_feature_bundle_list left_context_regex = get_context_regex(left_context_feature_bundle_list) right_context_regex = get_context_regex(right_context_feature_bundle_list) print(left_context_regex) print(right_context_regex)
def test_get_context_regex(self): feature_bundle_list = FeatureBundleList([{'cons': '+'}], is_one_item_list=True) regex_str = get_context_regex(feature_bundle_list) print(regex_str) regex = str2regexp(regex_str) eval_word = regex.evalWordP("t") print(eval_word) assert eval_word eval_word = regex.evalWordP("a") print(eval_word) assert not eval_word eval_word = regex.evalWordP("tt") print(eval_word) assert not eval_word regex1_str = "(a|b|c)" regex2_str = "a+b+c" regex_1 = str2regexp(regex1_str) regex_2 = str2regexp(regex2_str) comp = regex_1.compare(regex_2) assert comp
def _get_left_context_dfa(self): left_context_key = str(self.left_context_feature_bundle_list) if left_context_key in left_context_dfas: return left_context_dfas[left_context_key] alphabet = self.alphabet sigma_star_dfa = sigma_star_dfa_for_left_context if self.left_context_feature_bundle_list: context_regex = get_context_regex( self.left_context_feature_bundle_list) if configurations["LENGTHENING_FLAG"]: context_regex = context_regex + "(Y)*" left_context_dfa = str2regexp(context_regex, sigma=alphabet).toDFA() left_context_dfa_ignore_L = get_ignore_dfa( alphabet | set(LEFT_BRACKETS), left_context_dfa, set(LEFT_BRACKETS)) sigma_star_left_context_dfa = sigma_star_dfa.concat( left_context_dfa_ignore_L) else: sigma_star_left_context_dfa = sigma_star_dfa left_brackets_regex = "({})".format("+".join(LEFT_BRACKETS)) left_bracket_dfa = get_dfa_from_regex(left_brackets_regex, sigma=LEFT_BRACKETS) sigma_star_L = sigma_star_dfa.concat(left_bracket_dfa) sigma_star_L_complement = ~sigma_star_L subtraction_result = sigma_star_left_context_dfa & sigma_star_L_complement L_sigma_star = left_bracket_dfa.concat(sigma_star_dfa) p_iff_s_dfa = get_p_iff_s_dfa(subtraction_result, L_sigma_star) p_iff_s_ignore_right_bracket = get_ignore_dfa(alphabet | set(BRACKETS), p_iff_s_dfa, set(RIGHT_BRACKETS)) left_context_dfa = p_iff_s_ignore_right_bracket left_context_dfa = pyfst_from_dfa(left_context_dfa) left_context_dfas[left_context_key] = left_context_dfa return left_context_dfa
def get_context_dfa(context_features): alphabet = set(SegmentTable().get_segments_symbols()) context_regex = get_context_regex(context_features) context_dfa = str2regexp(context_regex, sigma=alphabet).toDFA() return context_dfa