Beispiel #1
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math

try:
    import property
except ModuleNotFoundError:
    from util import property

SENTENCE_BREAK_PROPERTY = "data/ucd/auxiliary/SentenceBreakProperty.txt"
code_props = property.read(SENTENCE_BREAK_PROPERTY)

for i in range(len(code_props)):
    if code_props[i] is None:
        code_props[i] = 'Other'

prop_names = set(code_props)
prop_names.remove('Other')

prop_vals = {}
prop_vals['Other'] = 0

for p in sorted(prop_names):
    prop_vals[p] = len(prop_vals)

Beispiel #2
0
# compatibility decompositions

import math

try:
    import property
    import unicode_data
except ModuleNotFoundError:
    from util import property
    from util import unicode_data

WORD_BREAK_PROPERTY = "data/ucd/auxiliary/WordBreakProperty.txt"
PROP_LIST = "data/ucd/PropList.txt"
DERIVED_CORE_PROPERTIES = "data/ucd/DerivedCoreProperties.txt"

code_props = property.read(WORD_BREAK_PROPERTY)
word_break_property = property.read(WORD_BREAK_PROPERTY, sets=True)

prop_list = property.read(PROP_LIST, sets=True)
white_space = prop_list['White_Space']

derived_core_properties = property.read(DERIVED_CORE_PROPERTIES, sets=True)
default_ignorable = derived_core_properties['Default_Ignorable_Code_Point']

for code in range(len(unicode_data.uchars)):
    u = unicode_data.uchars[code]
    if u is None or u.decomp is None:
        continue
    d = unicode_data.decompose(code)
    if d is None:
        continue
Beispiel #3
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math

try:
    import property
except ModuleNotFoundError:
    from util import property

EMOJI_DATA = "data/ucd/emoji/emoji-data.txt"
GRAPHEME_BREAK_PROPERTY = "data/ucd/auxiliary/GraphemeBreakProperty.txt"
code_props = property.read(GRAPHEME_BREAK_PROPERTY)
emoji_props = property.read(EMOJI_DATA, sets=True)

for i in range(len(code_props)):
    if code_props[i] is None:
        code_props[i] = 'Other'

for i in emoji_props['Extended_Pictographic']:
    assert code_props[i] == 'Other'
    code_props[i] = 'Extended_Pictographic'

prop_names = set(code_props)
prop_names.remove('Other')

prop_vals = {}
prop_vals['Other'] = 0