예제 #1
0
                       }

DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.

PNEUMA = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['ψιλὸν']),
                 re.escape(DIACRITICS['δασὺ'])] )
TONOS = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['βαρεῖα']),
                 re.escape(DIACRITICS['ὀξεῖα']),
                 re.escape(DIACRITICS['περισπωμένη'])] )
LETTERS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(LOWER_CASE_INVERSED.keys())) + \
                regexstring_list(tuple(UPPER_CASE_INVERSED.keys())) + \
                regexstring_list(tuple(OTHER_SYMBOLS_INVERSED.keys())) + \
                regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )
HYPOGEGRAMMENE = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['ὑπογεγραμμένη']),] )
DIALUTIKA = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['διαλυτικά']),] )
MEKOS = isort_a_lstrings_bylen_nodup(
예제 #2
0
파일: basic.py 프로젝트: suizokukan/dchars
    "διαλυτικά": ":",
}

DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.

PNEUMA = isort_a_lstrings_bylen_nodup([re.escape(DIACRITICS["ψιλὸν"]), re.escape(DIACRITICS["δασὺ"])])
TONOS = isort_a_lstrings_bylen_nodup(
    [re.escape(DIACRITICS["βαρεῖα"]), re.escape(DIACRITICS["ὀξεῖα"]), re.escape(DIACRITICS["περισπωμένη"])]
)
LETTERS = isort_a_lstrings_bylen_nodup(
    regexstring_list(tuple(LOWER_CASE_INVERSED.keys()))
    + regexstring_list(tuple(UPPER_CASE_INVERSED.keys()))
    + regexstring_list(tuple(OTHER_SYMBOLS_INVERSED.keys()))
    + regexstring_list(tuple(PUNCTUATION_INVERSED.keys()))
)
HYPOGEGRAMMENE = isort_a_lstrings_bylen_nodup([re.escape(DIACRITICS["ὑπογεγραμμένη"])])
DIALUTIKA = isort_a_lstrings_bylen_nodup([re.escape(DIACRITICS["διαλυτικά"])])
MEKOS = isort_a_lstrings_bylen_nodup([re.escape(DIACRITICS["μακρόν"]), re.escape(DIACRITICS["βραχύ"])])

PATTERN_TXT = (
    "((?P<trans_pneuma>({0}))?"
예제 #3
0
DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.

BASE_CHAR = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(CONSONANTS_INVERSED.keys())) + \
                regexstring_list(tuple(CONSONANTS_WITH_NUKTA_INVERSED.keys())) + \
                regexstring_list(tuple(VOWELS_INVERSED.keys())) + \
                regexstring_list(tuple(VOWELS_IN_HIATUS_INVERSED.keys())) + \
                regexstring_list(tuple(OTHER_SYMBOLS_INVERSED.keys())) + \
                regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )

ANUDATTA = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['DEVANAGARI STRESS SIGN ANUDATTA']),])

ACCENT   = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['DEVANAGARI STRESS SIGN UDATTA']),
                ])

ANUSVARA_CANDRABINDU = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['DEVANAGARI SIGN ANUSVARA']),
                 re.escape(DIACRITICS['DEVANAGARI SIGN CANDRABINDU']),
                 ])
예제 #4
0
파일: basic.py 프로젝트: suizokukan/dchars
                "stress2"       : "/",
                "makron"        : "_",
                "upperdot"      : "+",
              }
DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.
UPPERDOT = isort_a_lstrings_bylen_nodup(
                 [re.escape(DIACRITICS['upperdot'])] )
MAKRON = isort_a_lstrings_bylen_nodup(
                 [re.escape(DIACRITICS['makron']), ])
STRESS = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['stressM1']),
                 re.escape(DIACRITICS['stress1']),
                 re.escape(DIACRITICS['stress2']), ])
LETTERS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(LOWER_CASE_INVERSED.keys())) + \
                regexstring_list(tuple(UPPER_CASE_INVERSED.keys())) + \
                regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )

PATTERN_TXT = "((?P<base_char>({0}))" \
              "(?P<trans_stress>({1}))?" \
              "(?P<trans_makron>({2}))?" \
              "(?P<trans_upperdot>({3}))?" \
예제 #5
0
파일: basic.py 프로젝트: suizokukan/anceps
# E.g. CANTILATIONMARKS["HEBREW ACCENT ZAQEF GADOL"] = <HEBREW ACCENT ZAQEF GADOL>
CANTILATIONMARKS = { cmark:"<"+cmark+">" for cmark in symbols.SYMB_CANTILLATION_MARKS.keys() }
CANTILATIONMARKS_INVERSED = invertdict( CANTILATIONMARKS )

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.

T_BASECHARS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(LETTERS_INVERSED.keys())) + \
                regexstring_list(tuple(OTHER_SYMBOLS_INVERSED.keys())) + \
                regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )
T_VOWELS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(VOWELS_INVERSED.keys())))
T_METHEGH = isort_a_lstrings_bylen_nodup(
                 [re.escape(POINTS["HEBREW POINT METEG"]),] )
T_RAPHE = isort_a_lstrings_bylen_nodup(
                 [re.escape(POINTS["HEBREW POINT RAFE"]),] )
T_SPECIALPOINTS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(SPECIALPOINTS_INVERSED.keys())) )
T_CMARKS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(CANTILATIONMARKS_INVERSED.keys())) )

PATTERN_TXT = "((?P<base_char>({0}))" \
              "(?P<trans_vowel>({1}))?" \
              "(?P<trans_methegh>({2}))?" \
예제 #6
0
파일: basic.py 프로젝트: suizokukan/dchars
DIACRITICS = {"stress1": "\\", "stress2": "/", "stress12": "/\\", "stress3": "+:", "cedilla": "+c"}
DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.
STRESS = isort_a_lstrings_bylen_nodup(
    [
        re.escape(DIACRITICS["stress1"]),
        re.escape(DIACRITICS["stress2"]),
        re.escape(DIACRITICS["stress12"]),
        re.escape(DIACRITICS["stress3"]),
    ]
)
CEDILLA = isort_a_lstrings_bylen_nodup([re.escape(DIACRITICS["cedilla"])])
LETTERS = isort_a_lstrings_bylen_nodup(
    regexstring_list(tuple(LOWER_CASE_INVERSED.keys()))
    + regexstring_list(tuple(UPPER_CASE_INVERSED.keys()))
    + regexstring_list(tuple(PUNCTUATION_INVERSED.keys()))
)

PATTERN_TXT = (
    "((?P<base_char>({0}))"
    "(?P<trans_stress>({1}))?"
    "(?P<trans_cedilla>({2}))?"
    ")".format(
예제 #7
0
파일: basic.py 프로젝트: suizokukan/anceps
                "short"         : "*",
                "long"          : "_",
                "diaeresis"     : "+",
              }
DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.
DIAERESIS = isort_a_lstrings_bylen_nodup(
                 [re.escape(DIACRITICS['diaeresis'])] )
LENGTH = isort_a_lstrings_bylen_nodup(
                 [re.escape(DIACRITICS['short']),
                 re.escape(DIACRITICS['long'])] )
STRESS = isort_a_lstrings_bylen_nodup(
                [re.escape(DIACRITICS['stress']),])
LETTERS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(LOWER_CASE_INVERSED.keys())) + \
                regexstring_list(tuple(UPPER_CASE_INVERSED.keys())) + \
                regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )

PATTERN_TXT = "((?P<base_char>({0}))" \
              "(?P<trans_stress>({1}))?" \
              "(?P<trans_length>({2}))?" \
              "(?P<trans_diaeresis>({3}))?" \
              ")".format("|".join(prepare_list_to_strformat(LETTERS)),
예제 #8
0
파일: ewts.py 프로젝트: suizokukan/anceps
#
#  +1 (text->transliteration)
#  -1 (transliteration->text)
#
################################################################################
AVAILABLE_DIRECTIONS = (-1, +1)

################################################################################
# transliteration's patterns :
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.

TRANS_DOT_OR_PLUS = isort_a_lstrings_bylen_nodup(
                    regexstring_list( ('+', '.') ))
TRANS_PUNCTUATION_AND_OTHER_SYMBOL = isort_a_lstrings_bylen_nodup(
                      regexstring_list(tuple(OTHER_SYMBOLS_INVERSED.keys())) + \
                      regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )
TRANS_CONSONANTS_AND_VOWELS = isort_a_lstrings_bylen_nodup(
                      regexstring_list(tuple(CONSONANTS_INVERSED.keys())) + \
                      regexstring_list(tuple(VOWELS_INVERSED.keys())) )
TRANS_RNAM_BCAD = isort_a_lstrings_bylen_nodup(
                      regexstring_list( (DIACRITICS['SIGN RNAM BCAD'])) )
TRANS_HALANTA = isort_a_lstrings_bylen_nodup(
                      regexstring_list( (DIACRITICS['MARK HALANTA'])) )
TRANS_ANUSVARA_CANDRABINDU = isort_a_lstrings_bylen_nodup(
                      regexstring_list( (DIACRITICS['SIGN RJES SU NGA RO'],
                                         DIACRITICS['SIGN NYI ZLA NAA DA'],
                                         DIACRITICS['SIGN SNA LDAN'] )))
예제 #9
0
################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
# result : (|a|b|t|th) won't find 'th' in "theatre" but (th|a|b|t) will.
# We delete the possible duplicates in the resulting string.

LETTERS = isort_a_lstrings_bylen_nodup(
                regexstring_list(tuple(CHOONPU.keys())) + \
                regexstring_list(tuple(HIRAGANA_INVERSED.keys())) + \
                regexstring_list(tuple(HIRAGANA_DAKUTEN_INVERSED.keys())) + \
                regexstring_list(tuple(HIRAGANA_HANDAKUTEN_INVERSED.keys())) + \
                regexstring_list(tuple(KATAKANA_INVERSED.keys())) + \
                regexstring_list(tuple(KATAKANA_DAKUTEN_INVERSED.keys())) + \
                regexstring_list(tuple(KATAKANA_HANDAKUTEN_INVERSED.keys())) + \
                regexstring_list(tuple(OTHER_SYMBOLS_INVERSED.keys())) + \
                regexstring_list(tuple(PUNCTUATION_INVERSED.keys())) )

PATTERN_TXT = "(?P<base_char>({0}))".format("|".join(prepare_list_to_strformat(LETTERS)),)

# we inverse the effect of prepare_list_to_strformat()
PATTERN_TXT = PATTERN_TXT.replace('{{', '{')
PATTERN_TXT = PATTERN_TXT.replace('}}', '}')
PATTERN = re.compile(PATTERN_TXT)

PATTERN_TXT2 = "({0})".format("|".join(prepare_list_to_strformat(LETTERS)),)

# we inverse the effect of prepare_list_to_strformat()