Beispiel #1
0
 def __create_engine_cb(self, factory, engine_name):
     if engine_name != ENGINE_NAME:
         return None
     if (not IMPORT_TABLE_SUCCESSFUL
         or not IMPORT_TABSQLITEDB_SUCCESSFUL):
         with self.subTest(i='create-engine'):
             self.fail('NG: ibus-table not installed?')
         Gtk.main_quit()
         return None
     self.__id += 1
     object_path = '%s/%d' % (self.ENGINE_PATH, self.__id)
     db_dir = '/usr/share/ibus-table/tables'
     db_file = os.path.join(db_dir, engine_name + '.db')
     database = tabsqlitedb.TabSqliteDb(
         filename=db_file, user_db=':memory:', unit_test=True)
     self.__engine = table.TabEngine(
         self.__bus,
         object_path,
         database)
     self.__engine.connect('focus-in', self.__engine_focus_in)
     self.__engine.connect('focus-out', self.__engine_focus_out)
     # FIXME: Need to connect 'reset' after TabEngine.clear_all_input_and_preedit()
     # is called.
     self.__engine.connect_after('reset', self.__engine_reset)
     self.__bus.get_connection().signal_subscribe(
         None,
         IBus.INTERFACE_ENGINE,
         'UpdateLookupTable',
         object_path,
         None,
         0,
         self.__bus_signal_cb,
         self.__bus)
     return self.__engine
Beispiel #2
0
 def do_create_engine(self, engine_name):
     if DEBUG_LEVEL > 1:
         sys.stderr.write(
             "EngineFactory.do_create_engine(engine_name=%s)\n" %
             engine_name)
     engine_base_path = "/com/redhat/IBus/engines/table/%s/engine/"
     engine_path = engine_base_path % re.sub(r'[^a-zA-Z0-9_/]', '_',
                                             engine_name)
     try:
         if engine_name in self.dbdict:
             self.db = self.dbdict[engine_name]
         else:
             self.db = tabsqlitedb.TabSqliteDb()
             self.dbdict[engine_name] = self.db
         if engine_name in self.enginedict:
             engine = self.enginedict[engine_name]
         else:
             engine = hunspell_table.TypingBoosterEngine(
                 self.bus, engine_path + str(self.engine_id), self.db)
             self.enginedict[engine_name] = engine
             self.engine_id += 1
         return engine
     except:
         print("failed to create engine %s" % engine_name)
         import traceback
         traceback.print_exc()
         raise Exception("Cannot create engine %s" % engine_name)
Beispiel #3
0
    def __init__(self, bus, db=''):
        global DEBUG_LEVEL
        try:
            DEBUG_LEVEL = int(os.getenv('IBUS_TABLE_DEBUG_LEVEL'))
        except (TypeError, ValueError):
            DEBUG_LEVEL = int(0)
        if DEBUG_LEVEL > 1:
            LOGGER.debug('EngineFactory.__init__(bus=%s, db=%s)\n', bus, db)
        # db is the full path to the sql database
        if db:
            self.dbusname = os.path.basename(db).replace('.db', '')
            udb = os.path.basename(db).replace('.db', '-user.db')
            self.db = tabsqlitedb.TabSqliteDb(filename=db, user_db=udb)
            self.db.db.commit()
            self.dbdict = {self.dbusname: self.db}
        else:
            self.db = None
            self.dbdict = {}

        # init factory
        self.bus = bus
        super(EngineFactory, self).__init__(connection=bus.get_connection(),
                                            object_path=IBus.PATH_FACTORY)
        self.engine_id = 0
        self.engine_path = ''
Beispiel #4
0
 def init_database(
         self,
         user_db_file: str = ':memory:',
         dictionary_names: Iterable[str] = ('en_US',)) -> None:
     self.database = tabsqlitedb.TabSqliteDb(user_db_file=user_db_file)
     self.database.hunspell_obj.set_dictionary_names(
         list(dictionary_names))
 def do_create_engine(
         self, engine_name: str) -> hunspell_table.TypingBoosterEngine:
     if DEBUG_LEVEL > 1:
         LOGGER.debug('EngineFactory.do_create_engine(engine_name=%s)\n',
                      engine_name)
     engine_base_path = "/com/redhat/IBus/engines/table/%s/engine/"
     engine_path = engine_base_path % re.sub(r'[^a-zA-Z0-9_/]', '_',
                                             engine_name)
     try:
         if engine_name in self.database_dict:
             self.database = self.database_dict[engine_name]
         else:
             self.database = tabsqlitedb.TabSqliteDb()
             self.database_dict[engine_name] = self.database
         if engine_name in self.enginedict:
             engine = self.enginedict[engine_name]
         else:
             engine = hunspell_table.TypingBoosterEngine(
                 self.bus, engine_path + str(self.engine_id), self.database)
             self.enginedict[engine_name] = engine
             self.engine_id += 1
         return engine
     except Exception as error:
         print(f'failed to create engine {engine_name}: '
               f'{error.__class__.__name__}: {error}')
         import traceback
         traceback.print_exc()
         raise Exception from error
Beispiel #6
0
    def do_create_engine(self, engine_name):
        engine_name = re.sub(r'^table:', '', engine_name)
        engine_base_path = "/com/redhat/IBus/engines/table/%s/engine/"
        path_patt = re.compile(r'[^a-zA-Z0-9_/]')
        self.engine_path = engine_base_path % path_patt.sub('_', engine_name)
        try:
            if not self.db:
                # first check self.dbdict
                if not engine_name in self.dbdict:
                    try:
                        db_dir = os.path.join(
                            os.getenv('IBUS_TABLE_LOCATION'), 'tables')
                    except:
                        db_dir = "/usr/share/ibus-table/tables"
                    db = os.path.join(db_dir, engine_name+'.db')
                    udb = engine_name+'-user.db'
                    if not os.path.exists(db):
                        byo_db_dir = os.path.join(
                            os.getenv('HOME'), '.ibus/byo-tables')
                        db = os.path.join(byo_db_dir, engine_name + '.db')
                    _sq_db = tabsqlitedb.TabSqliteDb(filename=db, user_db=udb)
                    _sq_db.db.commit()
                    self.dbdict[engine_name] = _sq_db

            engine = table.TabEngine(self.bus,
                                     self.engine_path + str(self.engine_id),
                                     self.dbdict[engine_name])
            self.engine_id += 1
            #return engine.get_dbus_object()
            return engine
        except:
            print("failed to create engine %s" %engine_name)
            import traceback
            traceback.print_exc()
            raise Exception("Cannot create engine %s" %engine_name)
 def __create_engine_cb(self, factory, engine_name):
     if engine_name != 'testTyping-booster':
         return None
     if (not IMPORT_HUNSPELL_SUCCESSFUL
         or not IMPORT_TABSQLITEDB_SUCCESSFUL):
         with self.subTest(i='create-engine'):
             self.fail('NG: ibus-typing-booster not installed?')
         Gtk.main_quit()
         return None
     self.__id += 1
     object_path = '%s/%d' % (self.ENGINE_PATH, self.__id)
     database = tabsqlitedb.TabSqliteDb(user_db_file=':memory:')
     self.__engine = hunspell_table.TypingBoosterEngine(
         self.__bus,
         object_path,
         database)
     self.__engine.connect('focus-in', self.__engine_focus_in)
     self.__engine.connect('focus-out', self.__engine_focus_out)
     # Need to connect 'reset' after TypingBoosterEngine._clear_input()
     # is called.
     self.__engine.connect_after('reset', self.__engine_reset)
     self.__bus.get_connection().signal_subscribe(
         None,
         IBus.INTERFACE_ENGINE,
         'UpdateLookupTable',
         object_path,
         None,
         0,
         self.__bus_signal_cb,
         self.__bus)
     return self.__engine
Beispiel #8
0
 def setUp(self):
     self.bus = IBus.Bus()
     self.db = tabsqlitedb.TabSqliteDb(user_db_file = ':memory:')
     self.engine = TypingBoosterEngine(
         self.bus,
         '/com/redhat/IBus/engines/table/typing_booster/engine/0',
         self.db,
         unit_test = True)
     self.backup_original_settings()
     self.set_default_settings()
Beispiel #9
0
def set_up(engine_name):
    global TABSQLITEDB
    global ENGINE
    bus = IBus.Bus()
    db_dir = '/usr/share/ibus-table/tables'
    db_file = os.path.join(db_dir, engine_name + '.db')
    TABSQLITEDB = tabsqlitedb.TabSqliteDb(filename=db_file, user_db=':memory:')
    ENGINE = TabEngine(bus,
                       '/com/redhat/IBus/engines/table/%s/engine/0' %
                       engine_name,
                       TABSQLITEDB,
                       unit_test=True)
    backup_original_settings()
    set_default_settings()
Beispiel #10
0
    def __init__(self, bus, db=''):
        # db is the full path to the sql database
        if db:
            self.dbusname = os.path.basename(db).replace('.db', '')
            udb = os.path.basename(db).replace('.db', '-user.db')
            self.db = tabsqlitedb.TabSqliteDb(filename=db, user_db=udb)
            self.db.db.commit()
            self.dbdict = {self.dbusname:self.db}
        else:
            self.db = None
            self.dbdict = {}

        # init factory
        self.bus = bus
        super(EngineFactory, self).__init__(connection=bus.get_connection(),
                                            object_path=IBus.PATH_FACTORY)
        self.engine_id = 0
        self.engine_path = ''
Beispiel #11
0
def main():
    if _OPTIONS.xml:
        from locale import getdefaultlocale
        from xml.etree.ElementTree import Element, SubElement, tostring
        # we will output the engines xml and return.
        # 1. we find all dbs in DB_DIR and extract the infos into
        #    Elements
        dbs = os.listdir(DB_DIR)
        dbs = filter(lambda x: x.endswith('.db'), dbs)

        _all_dbs = []
        for _db in dbs:
            _all_dbs.append(os.path.join(DB_DIR, _db))
        try:
            byo_dbs = os.listdir(BYO_DB_DIR)
            byo_dbs = filter(lambda x: x.endswith('.db'), byo_dbs)
            for _db in byo_dbs:
                _all_dbs.append(os.path.join(BYO_DB_DIR, _db))
        except OSError:
            # BYO_DB_DIR does not exist or is not accessible
            pass

        egs = Element('engines')
        for _db in _all_dbs:
            _sq_db = tabsqlitedb.TabSqliteDb(_db, user_db=None)
            _engine = SubElement(egs, 'engine')

            _name = SubElement(_engine, 'name')
            engine_name = os.path.basename(_db).replace('.db', '')
            _name.text = 'table:'+engine_name
            setup_arg = "{} --engine-name {}".format(SETUP_CMD, _name.text)

            _longname = SubElement(_engine, 'longname')
            _longname.text = ''
            # getdefaultlocale() returns something like ('ja_JP', 'UTF-8').
            # In case of C/POSIX locale it returns (None, None)
            _locale = getdefaultlocale()[0]
            if _locale:
                _locale = _locale.lower()
            else:
                _locale = 'en'
            _longname.text = _sq_db.ime_properties.get(
                '.'.join(['name', _locale]))
            if not _longname.text:
                _longname.text = _sq_db.ime_properties.get(
                    '.'.join(['name', _locale.split('_')[0]]))
            if not _longname.text:
                _longname.text = _sq_db.ime_properties.get('name')
            if not _longname.text:
                _longname.text = engine_name

            _language = SubElement(_engine, 'language')
            _langs = _sq_db.ime_properties.get('languages')
            if _langs:
                _langs = _langs.split(',')
                if len(_langs) == 1:
                    _language.text = _langs[0].strip()
                else:
                    # we ignore the place
                    _language.text = _langs[0].strip().split('_')[0]

            _license = SubElement(_engine, 'license')
            _license.text = _sq_db.ime_properties.get('license')

            _author = SubElement(_engine, 'author')
            _author.text = _sq_db.ime_properties.get('author')

            _icon = SubElement(_engine, 'icon')
            _icon_basename = _sq_db.ime_properties.get('icon')
            if _icon_basename:
                _icon.text = os.path.join(ICON_DIR, _icon_basename)

            _layout = SubElement(_engine, 'layout')
            _layout.text = _sq_db.ime_properties.get('layout')

            _symbol = SubElement(_engine, 'symbol')
            _symbol.text = _sq_db.ime_properties.get('symbol')

            _desc = SubElement(_engine, 'description')
            _desc.text = _sq_db.ime_properties.get('description')

            _setup = SubElement(_engine, 'setup')
            _setup.text = setup_arg

            _icon_prop_key = SubElement(_engine, 'icon_prop_key')
            _icon_prop_key.text = 'InputMode'

        # now format the xmlout pretty
        indent(egs)
        egsout = tostring(egs, encoding='utf8').decode('utf-8')
        patt = re.compile(r'<\?.*\?>\n')
        egsout = patt.sub('', egsout)
        # Always write xml output in UTF-8 encoding, not in the
        # encoding of the current locale, otherwise it might fail
        # if conversion into the encoding of the current locale is
        # not possible:
        if sys.version_info >= (3, 0, 0):
            sys.stdout.buffer.write((egsout+'\n').encode('utf-8'))
        else:
            sys.stdout.write((egsout+'\n').encode('utf-8'))
        return 0

    if _OPTIONS.daemon:
        if os.fork():
            sys.exit()
    if _OPTIONS.db:
        if os.access(_OPTIONS.db, os.F_OK):
            db = _OPTIONS.db
        else:
            db = '%s%s%s' % (DB_DIR,
                             os.path.sep,
                             os.path.basename(_OPTIONS.db))
    else:
        db = ""
    ima = IMApp(db, _OPTIONS.ibus)
    signal(SIGTERM, lambda signum, stack_frame: cleanup(ima))
    signal(SIGINT, lambda signum, stack_frame: cleanup(ima))
    try:
        ima.run()
    except KeyboardInterrupt:
        ima.quit()
Beispiel #12
0
 def get_default_options_from_database(self):
     '''
     If there are default options in the database,
     they override the defaults from Gsettings.
     '''
     self.tabsqlitedb = tabsqlitedb.TabSqliteDb(
         filename=os.path.join(
             DB_DIR,
             re.sub(r'^table:', '', self.__engine_name)+'.db'),
         user_db=None,
         create_database=False)
     self.__is_chinese = False
     self.__is_cjk = False
     languages = self.tabsqlitedb.ime_properties.get('languages')
     if languages:
         languages = languages.split(',')
         for language in languages:
             if language.strip().startswith('zh'):
                 self.__is_chinese = True
             for lang in ['zh', 'ja', 'ko']:
                 if language.strip().startswith(lang):
                     self.__is_cjk = True
     self.__user_can_define_phrase = False
     user_can_define_phrase = self.tabsqlitedb.ime_properties.get(
         'user_can-define_phrase')
     if user_can_define_phrase:
         self.__user_can_define_phrase = (
             user_can_define_phrase.lower() == u'true')
     self.__rules = self.tabsqlitedb.ime_properties.get('rules')
     language_filter = self.tabsqlitedb.ime_properties.get(
         'language_filter')
     if language_filter in ('cm0', 'cm1', 'cm2', 'cm3', 'cm4'):
         OPTION_DEFAULTS['chinesemode'] = int(language_filter[-1])
     def_full_width_punct = self.tabsqlitedb.ime_properties.get(
         'def_full_width_punct')
     if (def_full_width_punct
             and type(def_full_width_punct) == type(u'')
             and def_full_width_punct.lower() in [u'true', u'false']):
         OPTION_DEFAULTS['tabdeffullwidthpunct'] = (
             def_full_width_punct.lower() == u'true')
         OPTION_DEFAULTS['endeffullwidthpunct'] = (
             def_full_width_punct.lower() == u'true')
     def_full_width_letter = self.tabsqlitedb.ime_properties.get(
         'def_full_width_letter')
     if (def_full_width_letter
             and type(def_full_width_letter) == type(u'')
             and def_full_width_letter.lower() in [u'true', u'false']):
         OPTION_DEFAULTS['tabdeffullwidthletter'] = (
             def_full_width_letter.lower() == u'true')
         OPTION_DEFAULTS['endeffullwidthletter'] = (
             def_full_width_letter.lower() == u'true')
     always_show_lookup = self.tabsqlitedb.ime_properties.get(
         'always_show_lookup')
     if (always_show_lookup
             and type(always_show_lookup) == type(u'')
             and always_show_lookup.lower() in [u'true', u'false']):
         OPTION_DEFAULTS['alwaysshowlookup'] = (
             always_show_lookup.lower() == u'true')
     select_keys_csv = self.tabsqlitedb.ime_properties.get('select_keys')
     if select_keys_csv:
         # select_keys_csv is something like: "1,2,3,4,5,6,7,8,9,0"
         OPTION_DEFAULTS['lookuptablepagesize'] = len(
             select_keys_csv.split(","))
     auto_select = self.tabsqlitedb.ime_properties.get('auto_select')
     if (auto_select
             and type(auto_select) == type(u'')
             and auto_select.lower() in [u'true', u'false']):
         OPTION_DEFAULTS['autoselect'] = auto_select.lower() == u'true'
     auto_commit = self.tabsqlitedb.ime_properties.get('auto_commit')
     if (auto_commit
             and type(auto_commit) == type(u'')
             and auto_commit.lower() in [u'true', u'false']):
         OPTION_DEFAULTS['autocommit'] = auto_commit.lower() == u'true'
     orientation = self.tabsqlitedb.get_orientation()
     OPTION_DEFAULTS['lookuptableorientation'] = orientation
     # if space is a page down key, set the option
     # “spacekeybehavior” to “True”:
     page_down_keys_csv = self.tabsqlitedb.ime_properties.get(
         'page_down_keys')
     if page_down_keys_csv:
         self._page_down_keys = [
             IBus.keyval_from_name(x)
             for x in page_down_keys_csv.split(',')]
     if IBus.KEY_space in self._page_down_keys:
         OPTION_DEFAULTS['spacekeybehavior'] = True
     # if space is a commit key, set the option
     # “spacekeybehavior” to “False” (overrides if space is
     # also a page down key):
     commit_keys_csv = self.tabsqlitedb.ime_properties.get('commit_keys')
     if commit_keys_csv:
         self._commit_keys = [
             IBus.keyval_from_name(x)
             for x in commit_keys_csv.split(',')]
     if IBus.KEY_space in self._commit_keys:
         OPTION_DEFAULTS['spacekeybehavior'] = False
     auto_wildcard = self.tabsqlitedb.ime_properties.get('auto_wildcard')
     if (auto_wildcard
             and type(auto_wildcard) == type(u'')
             and auto_wildcard.lower() in [u'true', u'false']):
         OPTION_DEFAULTS['autowildcard'] = auto_wildcard.lower() == u'true'
     single_wildcard_char = self.tabsqlitedb.ime_properties.get(
         'single_wildcard_char')
     if (single_wildcard_char
             and type(single_wildcard_char) == type(u'')):
         if len(single_wildcard_char) > 1:
             single_wildcard_char = single_wildcard_char[0]
         OPTION_DEFAULTS['singlewildcardchar'] = single_wildcard_char
     multi_wildcard_char = self.tabsqlitedb.ime_properties.get(
         'multi_wildcard_char')
     if (multi_wildcard_char
             and type(multi_wildcard_char) == type(u'')):
         if len(multi_wildcard_char) > 1:
             multi_wildcard_char = multi_wildcard_char[0]
         OPTION_DEFAULTS['multiwildcardchar'] = multi_wildcard_char
Beispiel #13
0
def main():
    def debug_print(message):
        if _OPTIONS.debug:
            print(message)

    if not _OPTIONS.only_index:
        try:
            os.unlink(_OPTIONS.name)
        except:
            pass

    debug_print('Processing Database')
    db = tabsqlitedb.TabSqliteDb(filename=_OPTIONS.name,
                                 user_db=None,
                                 create_database=True)

    def parse_source(f):
        _attri = []
        _table = []
        _gouci = []
        patt_com = re.compile(r'^###.*')
        patt_blank = re.compile(r'^[ \t]*$')
        patt_conf = re.compile(r'[^\t]*=[^\t]*')
        patt_table = re.compile(r'([^\t]+)\t([^\t]+)\t([0-9]+)(\t.*)?$')
        patt_gouci = re.compile(r' *[^\s]+ *\t *[^\s]+ *$')

        for line in f:
            if (not patt_com.match(line)) and (not patt_blank.match(line)):
                for _patt, _list in ((patt_table, _table),
                                     (patt_gouci, _gouci), (patt_conf,
                                                            _attri)):
                    if _patt.match(line):
                        _list.append(line)
                        break

        if not _gouci:
            # The user didn’t provide goucima (goucima = 構詞碼 =
            # “word formation keys”) in the table source, so we use
            # the longest encoding for a single character as the
            # goucima for that character.
            #
            # Example:
            #
            # wubi-jidian86.txt contains:
            #
            #     a         工      99454797
            #     aaa	工      551000000
            #     aaaa      工      551000000
            #     aaad      工期    5350000
            #     ... and more matches for compounds containing 工
            #
            # The longest key sequence to type 工 as a single
            # character is “aaaa”.  Therefore, the goucima of 工 is
            # “aaaa” (There is one other character with the same goucima
            # in  wubi-jidian86.txt, 㠭 also has the goucima “aaaa”).
            gouci_dict = {}
            for line in _table:
                res = patt_table.match(line)
                if res and len(res.group(2)) == 1:
                    if res.group(2) in gouci_dict:
                        if len(res.group(1)) > len(gouci_dict[res.group(2)]):
                            gouci_dict[res.group(2)] = res.group(1)
                    else:
                        gouci_dict[res.group(2)] = res.group(1)
            for key in gouci_dict:
                _gouci.append('%s\t%s' % (key, gouci_dict[key]))
            _gouci.sort()

        return (_attri, _table, _gouci)

    def parse_pinyin(f):
        _pinyins = []
        patt_com = re.compile(r'^#.*')
        patt_blank = re.compile(r'^[ \t]*$')
        patt_py = re.compile(r'(.*)\t(.*)\t(.*)')
        patt_yin = re.compile(r'[a-z]+[1-5]')

        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            if (not patt_com.match(line)) and (not patt_blank.match(line)):
                res = patt_py.match(line)
                if res:
                    yins = patt_yin.findall(res.group(2))
                    for yin in yins:
                        _pinyins.append("%s\t%s\t%s" \
                                % (res.group(1), yin, res.group(3)))
        return _pinyins[:]

    def parse_extra(f):
        _extra = []
        patt_com = re.compile(r'^###.*')
        patt_blank = re.compile(r'^[ \t]*$')
        patt_extra = re.compile(r'(.*)\t(.*)')

        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            if (not patt_com.match(line)) and (not patt_blank.match(line)):
                if patt_extra.match(line):
                    _extra.append(line)

        return _extra

    def pinyin_parser(f):
        for pinyin_line in f:
            if type(pinyin_line) != type(u''):
                pinyin_line = pinyin_line.decode('utf-8')
            _zi, _pinyin, _freq = pinyin_line.strip().split()
            yield (_pinyin, _zi, _freq)

    def phrase_parser(f):
        phrase_list = []
        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            xingma, phrase, freq = line.split('\t')[:3]
            if phrase == 'NOSYMBOL':
                phrase = u''
            phrase_list.append((xingma, phrase, int(freq), 0))
        return phrase_list

    def goucima_parser(f):
        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            zi, gcm = line.strip().split()
            yield (zi, gcm)

    def attribute_parser(f):
        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            try:
                attr, val = line.strip().split('=')
            except:
                attr, val = line.strip().split('==')
            attr = attr.strip().lower()
            val = val.strip()
            yield (attr, val)

    def extra_parser(f):
        extra_list = []
        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            phrase, freq = line.strip().split()
            _tabkey = db.parse_phrase(phrase)
            if _tabkey:
                extra_list.append((_tabkey, phrase, freq, 0))
            else:
                print('No tabkeys found for “%s”, not adding.\n' % phrase)
        return extra_list

    def get_char_prompts(f):
        '''
        Returns something like

        ("char_prompts", "{'a': '日', 'b': '日', 'c': '金', ...}")

        i.e. the attribute name "char_prompts" and as its value
        the string representation of a Python dictionary.
        '''
        char_prompts = {}
        start = False
        for line in f:
            if type(line) != type(u''):
                line = line.decode('utf-8')
            if re.match(r'^BEGIN_CHAR_PROMPTS_DEFINITION', line):
                start = True
                continue
            if not start:
                continue
            if re.match(r'^END_CHAR_PROMPTS_DEFINITION', line):
                break
            match = re.search(r'^(?P<char>[^\s]+)[\s]+(?P<prompt>[^\s]+)',
                              line)
            if match:
                char_prompts[match.group('char')] = match.group('prompt')
        return ("char_prompts", repr(char_prompts))

    if _OPTIONS.only_index:
        debug_print('Only create Indexes')
        debug_print('Optimizing database ')
        db.optimize_database()

        debug_print('Create Indexes ')
        db.create_indexes('main')
        debug_print('Done! :D')
        return 0

    # now we parse the ime source file
    debug_print('\tLoad sources "%s"' % _OPTIONS.source)
    patt_s = re.compile(r'.*\.bz2')
    _bz2s = patt_s.match(_OPTIONS.source)
    if _bz2s:
        source = bz2.BZ2File(_OPTIONS.source, "r").read()
    else:
        source = open(_OPTIONS.source, mode='r', encoding='UTF-8').read()
    source = source.replace('\r\n', '\n')
    source = source.split('\n')
    # first get config line and table line and goucima line respectively
    debug_print('\tParsing table source file ')
    attri, table, gouci = parse_source(source)

    debug_print('\t  get attribute of IME :)')
    attributes = list(attribute_parser(attri))
    attributes.append(get_char_prompts(source))
    debug_print('\t  add attributes into DB ')
    db.update_ime(attributes)
    db.create_tables('main')

    # second, we use generators for database generating:
    debug_print('\t  get phrases of IME :)')
    phrases = phrase_parser(table)

    # now we add things into db
    debug_print('\t  add phrases into DB ')
    db.add_phrases(phrases)

    if db.ime_properties.get('user_can_define_phrase').lower() == u'true':
        debug_print('\t  get goucima of IME :)')
        goucima = goucima_parser(gouci)
        debug_print('\t  add goucima into DB ')
        db.add_goucima(goucima)

    if db.ime_properties.get('pinyin_mode').lower() == u'true':
        debug_print('\tLoad pinyin source \"%s\"' % _OPTIONS.pinyin)
        _bz2p = patt_s.match(_OPTIONS.pinyin)
        if _bz2p:
            pinyin_s = bz2.BZ2File(_OPTIONS.pinyin, "r")
        else:
            pinyin_s = open(_OPTIONS.pinyin, 'r')
        debug_print('\tParsing pinyin source file ')
        pyline = parse_pinyin(pinyin_s)
        debug_print('\tPreapring pinyin entries')
        pinyin = pinyin_parser(pyline)
        debug_print('\t  add pinyin into DB ')
        db.add_pinyin(pinyin)

    debug_print('Optimizing database ')
    db.optimize_database()

    if (db.ime_properties.get('user_can_define_phrase').lower() == u'true'
            and _OPTIONS.extra):
        debug_print('\tPreparing for adding extra words')
        db.create_indexes('main')
        debug_print('\tLoad extra words source "%s"' % _OPTIONS.extra)
        _bz2p = patt_s.match(_OPTIONS.extra)
        if _bz2p:
            extra_s = bz2.BZ2File(_OPTIONS.extra, 'r')
        else:
            extra_s = open(_OPTIONS.extra, 'r')
        debug_print('\tParsing extra words source file ')
        extraline = parse_extra(extra_s)
        debug_print('\tPreparing extra words lines')
        extrawords = extra_parser(extraline)
        debug_print('\t  we have %d extra phrases from source' %
                    len(extrawords))
        # first get the entry of original phrases from
        # phrases-[(xingma, phrase, int(freq), 0)]
        orig_phrases = {}
        for phrase in phrases:
            orig_phrases.update({"%s\t%s" % (phrase[0], phrase[1]): phrase})
        debug_print('\t  the len of orig_phrases is: %d' % len(orig_phrases))
        extra_phrases = {}
        for extraword in extrawords:
            extra_phrases.update(
                {"%s\t%s" % (extraword[0], extraword[1]): extraword})
        debug_print('\t  the len of extra_phrases is: %d' % len(extra_phrases))
        # pop duplicated keys
        for phrase in extra_phrases:
            if phrase in orig_phrases:
                extra_phrases.pop(phrase)
        debug_print('\t  %d extra phrases will be added' % len(extra_phrases))
        new_phrases = list(extra_phrases.values())
        debug_print('\tAdding extra words into DB ')
        db.add_phrases(new_phrases)
        debug_print('Optimizing database ')
        db.optimize_database()

    if _OPTIONS.index:
        debug_print('Create Indexes ')
        db.create_indexes('main')
    else:
        debug_print("We don't create an index on the database, " +
                    "you should only activate this function " +
                    "for distribution purposes.")
        db.drop_indexes('main')
    debug_print('Done! :D')