#!/bin/env python #coding:utf8 import modules.log4py as log4py import modules.char as char import codecs import string import cPickle as pk import sys import traceback log = log4py.log4py('[unihan]') file_src = '../data/Unihandata.txt' file_write='../data/unihan3.txt' ''' 0x4E00-0x9FFF CJK 统一字型 常用字 共 20992个(实际只定义到0x9FC3) 0x3400-0x4DFF CJK 统一字型扩展表A 少用字 共 6656个 0x20000-0x2A6DF CJK 统一字型扩展表B 少用字,历史上使用 共42720个 0x2A700-0x2B73F CJK 统一字型扩展表C 共4160个汉字,全部定义 0xF900-0xFAFF CJK 兼容字型 重复字,可统一变体,共同字 共512个 0x2F800-0x2FA1F CJK 兼容字型补遗 可统一变体 共544个 ''' WUBI=1 CANGJIE=2 FOURCORNER=3 ZHENGMA=4 dict={} spaces=['',' ','\t','\n'] hzlist=[]
#!/bin/dev python # coding=utf8 #from memory import * import codecs import string import modules.log4py as log4py log = log4py.log4py('[Char]') Chinese = 1 Japanese = 3 Korean=5 Vietnamese = 6 English = 8 #sublang Mandarin = 11 Cantonese = 12 JapaneseOn = 31 JapaneseKun = 32 Hangul = 51 Korean_roman = 52 Us_en = 81 Eng_en = 82
#!/usr/bin/env python # -*- coding: UTF-8 -*- # author ablozhou 周海汉 # [email protected] # http://blog.csdn.net/ablo_zhou # 2010.3.14 import chardet import urllib import modules.log4py as log4py import sys log = log4py.log4py('[encdet]') def detect(filename, text = '', encoding = None): defaultenc = sys.getfilesystemencoding() begin = 0 if not text: try: text = file(filename,'rb').read(1000) except: log.tracebk() encoding = defaultenc return defaultenc if text.startswith('\xEF\xBB\xBF'): begin = 3 encoding = 'UTF-8' elif text.startswith('\xFF\xFE'): begin = 2 encoding = 'UTF-16'
#!/usr/bin/env python # -*- coding: utf8 -*- # Author: ablozhou # E-mail: [email protected] # # Copyright 2010 ablozhou import codecs import string import modules.log4py as log4py log = log4py.log4py("[phonetic]") Chinese = 1 # sublang Mandarin = 11 Cantonese = 12 class Phonetic: """ Process phonetic symbols of Chiese characters or other languages. """ def __init__(self, phonetic, sublang=Mandarin, language=Chinese, complete=False): self.language = language self.sublang = sublang self.complete = complete self.setphonetic(phonetic, sublang, language, complete)
# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # modify history # date author notes # 2010.1.26 ablozhou release 0.5 OS:ubuntu 9.10 python:2.6.2 # import codecs import string import modules.log4py as log4py log = log4py.log4py('[Char]') Chinese = 1 Japanese = 3 Korean=5 Vietnamese = 6 English = 8 #sublang Mandarin = 11 Cantonese = 12 JapaneseOn = 31 JapaneseKun = 32
#!/usr/bin/env python # -*- coding: UTF-8 -*- import wx import os import mainui_xrc import wx.xrc as xrc import modules.group as group import modules.log4py as log4py import string import codecs import tools.encdet as encdet log = log4py.log4py('[abcframe]') class abcframe(mainui_xrc.xrcmframe): def __init__(self,parent): mainui_xrc.xrcmframe.__init__(self,parent) self.txtmain = xrc.XRCCTRL(self, "txtmain") self.tree = xrc.XRCCTRL(self, "tree") il = wx.ImageList(16,16) self.fldridx = il.Add( wx.ArtProvider.GetBitmap(wx.ART_FOLDER, wx.ART_OTHER, (16,16))) self.fldropenidx = il.Add( wx.ArtProvider.GetBitmap(wx.ART_FILE_OPEN, wx.ART_OTHER, (16,16))) self.fileidx = il.Add( wx.ArtProvider.GetBitmap(wx.ART_NORMAL_FILE,
#!/usr/bin/env python # -*- coding: UTF-8 -*- import wx import os import abcxrc import wx.xrc as xrc import modules.group as group import modules.log4py as log4py import string import codecs import tools.encdet as encdet log = log4py.log4py("[abclfrm]") class abclfrm(abcxrc.xrcmframe): def __init__(self, parent): abcxrc.xrcmframe.__init__(self, parent) self.txtmain = xrc.XRCCTRL(self, "txtmain") self.tree = xrc.XRCCTRL(self, "tree") il = wx.ImageList(16, 16) self.fldridx = il.Add(wx.ArtProvider.GetBitmap(wx.ART_FOLDER, wx.ART_OTHER, (16, 16))) self.fldropenidx = il.Add(wx.ArtProvider.GetBitmap(wx.ART_FILE_OPEN, wx.ART_OTHER, (16, 16))) self.fileidx = il.Add(wx.ArtProvider.GetBitmap(wx.ART_NORMAL_FILE, wx.ART_OTHER, (16, 16))) self.tree.AssignImageList(il) self.root = self.tree.AddRoot("root") filename = "../data/hz.txt"
import sys import wx import mainui_xrc import wx.xrc as xrc import modules.log4py as log4py import string import procdict import cPickle as pk import config import gettext import i18n import modules.data as data log = log4py.log4py('[hzdqframe]') encoding = sys.getfilesystemencoding() idxfile = './data/hzidx.dat' datafile = './data/unihan.dat' class hzdqframe(mainui_xrc.xrcmframe): def __init__(self,parent): #config file conf = config.Configure('hzdq.ini') lang = conf.getlocale() #i18n i18n.install(self, 'lang', lang)
#!/bin/env python # -*- coding: utf8 -*- import wx import mainui_xrc import wx.xrc as xrc # import modules.group as group import modules.log4py as log4py import string import procdict import cPickle as pk log = log4py.log4py("[hzdqframe]") class hzdqframe(mainui_xrc.xrcmframe): def __init__(self, parent): mainui_xrc.xrcmframe.__init__(self, parent) self.txtmain = xrc.XRCCTRL(self, "txtmain") self.txtsearch = xrc.XRCCTRL(self, "txtsearch") self.txtsearch.SetValue("中") s = self.txtsearch.GetValue().encode("utf8") f = file("../../data/hzidx.dat", "rb") self.hzidx = pk.load(f) f.close() # self.procdict = procdict.procdict('../../data/unihan.zip','blog.csdn.net/ablo_zhou') # self.unihan = self.procdict.dicttxt