# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math try: import property except ModuleNotFoundError: from util import property SENTENCE_BREAK_PROPERTY = "data/ucd/auxiliary/SentenceBreakProperty.txt" code_props = property.read(SENTENCE_BREAK_PROPERTY) for i in range(len(code_props)): if code_props[i] is None: code_props[i] = 'Other' prop_names = set(code_props) prop_names.remove('Other') prop_vals = {} prop_vals['Other'] = 0 for p in sorted(prop_names): prop_vals[p] = len(prop_vals)
# compatibility decompositions import math try: import property import unicode_data except ModuleNotFoundError: from util import property from util import unicode_data WORD_BREAK_PROPERTY = "data/ucd/auxiliary/WordBreakProperty.txt" PROP_LIST = "data/ucd/PropList.txt" DERIVED_CORE_PROPERTIES = "data/ucd/DerivedCoreProperties.txt" code_props = property.read(WORD_BREAK_PROPERTY) word_break_property = property.read(WORD_BREAK_PROPERTY, sets=True) prop_list = property.read(PROP_LIST, sets=True) white_space = prop_list['White_Space'] derived_core_properties = property.read(DERIVED_CORE_PROPERTIES, sets=True) default_ignorable = derived_core_properties['Default_Ignorable_Code_Point'] for code in range(len(unicode_data.uchars)): u = unicode_data.uchars[code] if u is None or u.decomp is None: continue d = unicode_data.decompose(code) if d is None: continue
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math try: import property except ModuleNotFoundError: from util import property EMOJI_DATA = "data/ucd/emoji/emoji-data.txt" GRAPHEME_BREAK_PROPERTY = "data/ucd/auxiliary/GraphemeBreakProperty.txt" code_props = property.read(GRAPHEME_BREAK_PROPERTY) emoji_props = property.read(EMOJI_DATA, sets=True) for i in range(len(code_props)): if code_props[i] is None: code_props[i] = 'Other' for i in emoji_props['Extended_Pictographic']: assert code_props[i] == 'Other' code_props[i] = 'Extended_Pictographic' prop_names = set(code_props) prop_names.remove('Other') prop_vals = {} prop_vals['Other'] = 0