Ejemplo n.º 1
0
#!/bin/env python
#coding:utf8

import modules.log4py as log4py
import modules.char as char
import codecs
import string
import cPickle as pk
import sys
import traceback

log = log4py.log4py('[unihan]')
file_src = '../data/Unihandata.txt'
file_write='../data/unihan3.txt'
'''
0x4E00-0x9FFF       CJK 统一字型               常用字          共 20992个(实际只定义到0x9FC3)
0x3400-0x4DFF       CJK 统一字型扩展表A     少用字   共 6656个
0x20000-0x2A6DF   CJK 统一字型扩展表B  少用字,历史上使用  共42720个
0x2A700-0x2B73F   CJK 统一字型扩展表C    共4160个汉字,全部定义
0xF900-0xFAFF       CJK 兼容字型        重复字,可统一变体,共同字  共512个
0x2F800-0x2FA1F   CJK 兼容字型补遗    可统一变体 共544个
'''
WUBI=1
CANGJIE=2
FOURCORNER=3
ZHENGMA=4

dict={}
spaces=['',' ','\t','\n']
hzlist=[]
Ejemplo n.º 2
0
#!/bin/dev python
# coding=utf8

#from memory import *
import codecs
import string
import modules.log4py as log4py

log = log4py.log4py('[Char]')


Chinese = 1
   
Japanese = 3
    
Korean=5
Vietnamese = 6
English = 8

#sublang 
Mandarin = 11
Cantonese = 12 
JapaneseOn = 31
JapaneseKun = 32

Hangul = 51
Korean_roman = 52

Us_en = 81
Eng_en = 82
Ejemplo n.º 3
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# author ablozhou 周海汉
# [email protected]
# http://blog.csdn.net/ablo_zhou
# 2010.3.14

import chardet
import urllib
import modules.log4py as log4py
import sys
log = log4py.log4py('[encdet]')

def detect(filename, text = '', encoding = None):

    defaultenc = sys.getfilesystemencoding()
    begin = 0
    if not text:
        try:
            text = file(filename,'rb').read(1000)
        except:
            log.tracebk()
            encoding = defaultenc
            return defaultenc

    if text.startswith('\xEF\xBB\xBF'):
        begin = 3
        encoding = 'UTF-8'
    elif text.startswith('\xFF\xFE'):
        begin = 2
        encoding = 'UTF-16'
Ejemplo n.º 4
0
#!/usr/bin/env python
# -*- coding: utf8 -*-
#   Author:        ablozhou
#   E-mail:        [email protected]
#
#   Copyright 2010 ablozhou
import codecs
import string
import modules.log4py as log4py

log = log4py.log4py("[phonetic]")


Chinese = 1

# sublang
Mandarin = 11
Cantonese = 12


class Phonetic:
    """ Process phonetic symbols of Chiese characters
    or other languages.
    """

    def __init__(self, phonetic, sublang=Mandarin, language=Chinese, complete=False):
        self.language = language
        self.sublang = sublang
        self.complete = complete
        self.setphonetic(phonetic, sublang, language, complete)
Ejemplo n.º 5
0
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#   modify history
#   date          author    notes
#   2010.1.26    ablozhou   release 0.5 OS:ubuntu 9.10 python:2.6.2
#


import codecs
import string
import modules.log4py as log4py

log = log4py.log4py('[Char]')


Chinese = 1

Japanese = 3

Korean=5
Vietnamese = 6
English = 8

#sublang
Mandarin = 11
Cantonese = 12
JapaneseOn = 31
JapaneseKun = 32
Ejemplo n.º 6
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import wx
import os
import mainui_xrc
import wx.xrc as xrc
import modules.group as group
import modules.log4py as log4py
import string
import codecs
import tools.encdet as encdet

log = log4py.log4py('[abcframe]')

class abcframe(mainui_xrc.xrcmframe):
    def __init__(self,parent):

        mainui_xrc.xrcmframe.__init__(self,parent)

        self.txtmain = xrc.XRCCTRL(self, "txtmain")
        self.tree = xrc.XRCCTRL(self, "tree")
        il = wx.ImageList(16,16)
        self.fldridx = il.Add(
            wx.ArtProvider.GetBitmap(wx.ART_FOLDER,
                    wx.ART_OTHER, (16,16)))
        self.fldropenidx = il.Add(
            wx.ArtProvider.GetBitmap(wx.ART_FILE_OPEN,
                    wx.ART_OTHER, (16,16)))
        self.fileidx = il.Add(
            wx.ArtProvider.GetBitmap(wx.ART_NORMAL_FILE,
Ejemplo n.º 7
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import wx
import os
import abcxrc
import wx.xrc as xrc
import modules.group as group
import modules.log4py as log4py
import string
import codecs
import tools.encdet as encdet

log = log4py.log4py("[abclfrm]")


class abclfrm(abcxrc.xrcmframe):
    def __init__(self, parent):

        abcxrc.xrcmframe.__init__(self, parent)

        self.txtmain = xrc.XRCCTRL(self, "txtmain")
        self.tree = xrc.XRCCTRL(self, "tree")
        il = wx.ImageList(16, 16)
        self.fldridx = il.Add(wx.ArtProvider.GetBitmap(wx.ART_FOLDER, wx.ART_OTHER, (16, 16)))
        self.fldropenidx = il.Add(wx.ArtProvider.GetBitmap(wx.ART_FILE_OPEN, wx.ART_OTHER, (16, 16)))
        self.fileidx = il.Add(wx.ArtProvider.GetBitmap(wx.ART_NORMAL_FILE, wx.ART_OTHER, (16, 16)))

        self.tree.AssignImageList(il)
        self.root = self.tree.AddRoot("root")
        filename = "../data/hz.txt"
Ejemplo n.º 8
0
import sys
import wx
import mainui_xrc
import wx.xrc as xrc

import modules.log4py as log4py
import string
import procdict
import cPickle as pk
import config
import gettext
import i18n
import modules.data as data

log = log4py.log4py('[hzdqframe]')

encoding = sys.getfilesystemencoding()

idxfile = './data/hzidx.dat'
datafile = './data/unihan.dat'


class hzdqframe(mainui_xrc.xrcmframe):
    def __init__(self,parent):
        #config file
        conf = config.Configure('hzdq.ini')
        lang = conf.getlocale()

        #i18n
        i18n.install(self, 'lang', lang)
Ejemplo n.º 9
0
#!/bin/env python
# -*- coding: utf8 -*-

import wx
import mainui_xrc
import wx.xrc as xrc

# import modules.group as group
import modules.log4py as log4py
import string
import procdict
import cPickle as pk

log = log4py.log4py("[hzdqframe]")


class hzdqframe(mainui_xrc.xrcmframe):
    def __init__(self, parent):

        mainui_xrc.xrcmframe.__init__(self, parent)

        self.txtmain = xrc.XRCCTRL(self, "txtmain")

        self.txtsearch = xrc.XRCCTRL(self, "txtsearch")
        self.txtsearch.SetValue("中")
        s = self.txtsearch.GetValue().encode("utf8")
        f = file("../../data/hzidx.dat", "rb")
        self.hzidx = pk.load(f)
        f.close()
        # self.procdict = procdict.procdict('../../data/unihan.zip','blog.csdn.net/ablo_zhou')
        # self.unihan = self.procdict.dicttxt