def __create_engine_cb(self, factory, engine_name): if engine_name != ENGINE_NAME: return None if (not IMPORT_TABLE_SUCCESSFUL or not IMPORT_TABSQLITEDB_SUCCESSFUL): with self.subTest(i='create-engine'): self.fail('NG: ibus-table not installed?') Gtk.main_quit() return None self.__id += 1 object_path = '%s/%d' % (self.ENGINE_PATH, self.__id) db_dir = '/usr/share/ibus-table/tables' db_file = os.path.join(db_dir, engine_name + '.db') database = tabsqlitedb.TabSqliteDb( filename=db_file, user_db=':memory:', unit_test=True) self.__engine = table.TabEngine( self.__bus, object_path, database) self.__engine.connect('focus-in', self.__engine_focus_in) self.__engine.connect('focus-out', self.__engine_focus_out) # FIXME: Need to connect 'reset' after TabEngine.clear_all_input_and_preedit() # is called. self.__engine.connect_after('reset', self.__engine_reset) self.__bus.get_connection().signal_subscribe( None, IBus.INTERFACE_ENGINE, 'UpdateLookupTable', object_path, None, 0, self.__bus_signal_cb, self.__bus) return self.__engine
def do_create_engine(self, engine_name): if DEBUG_LEVEL > 1: sys.stderr.write( "EngineFactory.do_create_engine(engine_name=%s)\n" % engine_name) engine_base_path = "/com/redhat/IBus/engines/table/%s/engine/" engine_path = engine_base_path % re.sub(r'[^a-zA-Z0-9_/]', '_', engine_name) try: if engine_name in self.dbdict: self.db = self.dbdict[engine_name] else: self.db = tabsqlitedb.TabSqliteDb() self.dbdict[engine_name] = self.db if engine_name in self.enginedict: engine = self.enginedict[engine_name] else: engine = hunspell_table.TypingBoosterEngine( self.bus, engine_path + str(self.engine_id), self.db) self.enginedict[engine_name] = engine self.engine_id += 1 return engine except: print("failed to create engine %s" % engine_name) import traceback traceback.print_exc() raise Exception("Cannot create engine %s" % engine_name)
def __init__(self, bus, db=''): global DEBUG_LEVEL try: DEBUG_LEVEL = int(os.getenv('IBUS_TABLE_DEBUG_LEVEL')) except (TypeError, ValueError): DEBUG_LEVEL = int(0) if DEBUG_LEVEL > 1: LOGGER.debug('EngineFactory.__init__(bus=%s, db=%s)\n', bus, db) # db is the full path to the sql database if db: self.dbusname = os.path.basename(db).replace('.db', '') udb = os.path.basename(db).replace('.db', '-user.db') self.db = tabsqlitedb.TabSqliteDb(filename=db, user_db=udb) self.db.db.commit() self.dbdict = {self.dbusname: self.db} else: self.db = None self.dbdict = {} # init factory self.bus = bus super(EngineFactory, self).__init__(connection=bus.get_connection(), object_path=IBus.PATH_FACTORY) self.engine_id = 0 self.engine_path = ''
def init_database( self, user_db_file: str = ':memory:', dictionary_names: Iterable[str] = ('en_US',)) -> None: self.database = tabsqlitedb.TabSqliteDb(user_db_file=user_db_file) self.database.hunspell_obj.set_dictionary_names( list(dictionary_names))
def do_create_engine( self, engine_name: str) -> hunspell_table.TypingBoosterEngine: if DEBUG_LEVEL > 1: LOGGER.debug('EngineFactory.do_create_engine(engine_name=%s)\n', engine_name) engine_base_path = "/com/redhat/IBus/engines/table/%s/engine/" engine_path = engine_base_path % re.sub(r'[^a-zA-Z0-9_/]', '_', engine_name) try: if engine_name in self.database_dict: self.database = self.database_dict[engine_name] else: self.database = tabsqlitedb.TabSqliteDb() self.database_dict[engine_name] = self.database if engine_name in self.enginedict: engine = self.enginedict[engine_name] else: engine = hunspell_table.TypingBoosterEngine( self.bus, engine_path + str(self.engine_id), self.database) self.enginedict[engine_name] = engine self.engine_id += 1 return engine except Exception as error: print(f'failed to create engine {engine_name}: ' f'{error.__class__.__name__}: {error}') import traceback traceback.print_exc() raise Exception from error
def do_create_engine(self, engine_name): engine_name = re.sub(r'^table:', '', engine_name) engine_base_path = "/com/redhat/IBus/engines/table/%s/engine/" path_patt = re.compile(r'[^a-zA-Z0-9_/]') self.engine_path = engine_base_path % path_patt.sub('_', engine_name) try: if not self.db: # first check self.dbdict if not engine_name in self.dbdict: try: db_dir = os.path.join( os.getenv('IBUS_TABLE_LOCATION'), 'tables') except: db_dir = "/usr/share/ibus-table/tables" db = os.path.join(db_dir, engine_name+'.db') udb = engine_name+'-user.db' if not os.path.exists(db): byo_db_dir = os.path.join( os.getenv('HOME'), '.ibus/byo-tables') db = os.path.join(byo_db_dir, engine_name + '.db') _sq_db = tabsqlitedb.TabSqliteDb(filename=db, user_db=udb) _sq_db.db.commit() self.dbdict[engine_name] = _sq_db engine = table.TabEngine(self.bus, self.engine_path + str(self.engine_id), self.dbdict[engine_name]) self.engine_id += 1 #return engine.get_dbus_object() return engine except: print("failed to create engine %s" %engine_name) import traceback traceback.print_exc() raise Exception("Cannot create engine %s" %engine_name)
def __create_engine_cb(self, factory, engine_name): if engine_name != 'testTyping-booster': return None if (not IMPORT_HUNSPELL_SUCCESSFUL or not IMPORT_TABSQLITEDB_SUCCESSFUL): with self.subTest(i='create-engine'): self.fail('NG: ibus-typing-booster not installed?') Gtk.main_quit() return None self.__id += 1 object_path = '%s/%d' % (self.ENGINE_PATH, self.__id) database = tabsqlitedb.TabSqliteDb(user_db_file=':memory:') self.__engine = hunspell_table.TypingBoosterEngine( self.__bus, object_path, database) self.__engine.connect('focus-in', self.__engine_focus_in) self.__engine.connect('focus-out', self.__engine_focus_out) # Need to connect 'reset' after TypingBoosterEngine._clear_input() # is called. self.__engine.connect_after('reset', self.__engine_reset) self.__bus.get_connection().signal_subscribe( None, IBus.INTERFACE_ENGINE, 'UpdateLookupTable', object_path, None, 0, self.__bus_signal_cb, self.__bus) return self.__engine
def setUp(self): self.bus = IBus.Bus() self.db = tabsqlitedb.TabSqliteDb(user_db_file = ':memory:') self.engine = TypingBoosterEngine( self.bus, '/com/redhat/IBus/engines/table/typing_booster/engine/0', self.db, unit_test = True) self.backup_original_settings() self.set_default_settings()
def set_up(engine_name): global TABSQLITEDB global ENGINE bus = IBus.Bus() db_dir = '/usr/share/ibus-table/tables' db_file = os.path.join(db_dir, engine_name + '.db') TABSQLITEDB = tabsqlitedb.TabSqliteDb(filename=db_file, user_db=':memory:') ENGINE = TabEngine(bus, '/com/redhat/IBus/engines/table/%s/engine/0' % engine_name, TABSQLITEDB, unit_test=True) backup_original_settings() set_default_settings()
def __init__(self, bus, db=''): # db is the full path to the sql database if db: self.dbusname = os.path.basename(db).replace('.db', '') udb = os.path.basename(db).replace('.db', '-user.db') self.db = tabsqlitedb.TabSqliteDb(filename=db, user_db=udb) self.db.db.commit() self.dbdict = {self.dbusname:self.db} else: self.db = None self.dbdict = {} # init factory self.bus = bus super(EngineFactory, self).__init__(connection=bus.get_connection(), object_path=IBus.PATH_FACTORY) self.engine_id = 0 self.engine_path = ''
def main(): if _OPTIONS.xml: from locale import getdefaultlocale from xml.etree.ElementTree import Element, SubElement, tostring # we will output the engines xml and return. # 1. we find all dbs in DB_DIR and extract the infos into # Elements dbs = os.listdir(DB_DIR) dbs = filter(lambda x: x.endswith('.db'), dbs) _all_dbs = [] for _db in dbs: _all_dbs.append(os.path.join(DB_DIR, _db)) try: byo_dbs = os.listdir(BYO_DB_DIR) byo_dbs = filter(lambda x: x.endswith('.db'), byo_dbs) for _db in byo_dbs: _all_dbs.append(os.path.join(BYO_DB_DIR, _db)) except OSError: # BYO_DB_DIR does not exist or is not accessible pass egs = Element('engines') for _db in _all_dbs: _sq_db = tabsqlitedb.TabSqliteDb(_db, user_db=None) _engine = SubElement(egs, 'engine') _name = SubElement(_engine, 'name') engine_name = os.path.basename(_db).replace('.db', '') _name.text = 'table:'+engine_name setup_arg = "{} --engine-name {}".format(SETUP_CMD, _name.text) _longname = SubElement(_engine, 'longname') _longname.text = '' # getdefaultlocale() returns something like ('ja_JP', 'UTF-8'). # In case of C/POSIX locale it returns (None, None) _locale = getdefaultlocale()[0] if _locale: _locale = _locale.lower() else: _locale = 'en' _longname.text = _sq_db.ime_properties.get( '.'.join(['name', _locale])) if not _longname.text: _longname.text = _sq_db.ime_properties.get( '.'.join(['name', _locale.split('_')[0]])) if not _longname.text: _longname.text = _sq_db.ime_properties.get('name') if not _longname.text: _longname.text = engine_name _language = SubElement(_engine, 'language') _langs = _sq_db.ime_properties.get('languages') if _langs: _langs = _langs.split(',') if len(_langs) == 1: _language.text = _langs[0].strip() else: # we ignore the place _language.text = _langs[0].strip().split('_')[0] _license = SubElement(_engine, 'license') _license.text = _sq_db.ime_properties.get('license') _author = SubElement(_engine, 'author') _author.text = _sq_db.ime_properties.get('author') _icon = SubElement(_engine, 'icon') _icon_basename = _sq_db.ime_properties.get('icon') if _icon_basename: _icon.text = os.path.join(ICON_DIR, _icon_basename) _layout = SubElement(_engine, 'layout') _layout.text = _sq_db.ime_properties.get('layout') _symbol = SubElement(_engine, 'symbol') _symbol.text = _sq_db.ime_properties.get('symbol') _desc = SubElement(_engine, 'description') _desc.text = _sq_db.ime_properties.get('description') _setup = SubElement(_engine, 'setup') _setup.text = setup_arg _icon_prop_key = SubElement(_engine, 'icon_prop_key') _icon_prop_key.text = 'InputMode' # now format the xmlout pretty indent(egs) egsout = tostring(egs, encoding='utf8').decode('utf-8') patt = re.compile(r'<\?.*\?>\n') egsout = patt.sub('', egsout) # Always write xml output in UTF-8 encoding, not in the # encoding of the current locale, otherwise it might fail # if conversion into the encoding of the current locale is # not possible: if sys.version_info >= (3, 0, 0): sys.stdout.buffer.write((egsout+'\n').encode('utf-8')) else: sys.stdout.write((egsout+'\n').encode('utf-8')) return 0 if _OPTIONS.daemon: if os.fork(): sys.exit() if _OPTIONS.db: if os.access(_OPTIONS.db, os.F_OK): db = _OPTIONS.db else: db = '%s%s%s' % (DB_DIR, os.path.sep, os.path.basename(_OPTIONS.db)) else: db = "" ima = IMApp(db, _OPTIONS.ibus) signal(SIGTERM, lambda signum, stack_frame: cleanup(ima)) signal(SIGINT, lambda signum, stack_frame: cleanup(ima)) try: ima.run() except KeyboardInterrupt: ima.quit()
def get_default_options_from_database(self): ''' If there are default options in the database, they override the defaults from Gsettings. ''' self.tabsqlitedb = tabsqlitedb.TabSqliteDb( filename=os.path.join( DB_DIR, re.sub(r'^table:', '', self.__engine_name)+'.db'), user_db=None, create_database=False) self.__is_chinese = False self.__is_cjk = False languages = self.tabsqlitedb.ime_properties.get('languages') if languages: languages = languages.split(',') for language in languages: if language.strip().startswith('zh'): self.__is_chinese = True for lang in ['zh', 'ja', 'ko']: if language.strip().startswith(lang): self.__is_cjk = True self.__user_can_define_phrase = False user_can_define_phrase = self.tabsqlitedb.ime_properties.get( 'user_can-define_phrase') if user_can_define_phrase: self.__user_can_define_phrase = ( user_can_define_phrase.lower() == u'true') self.__rules = self.tabsqlitedb.ime_properties.get('rules') language_filter = self.tabsqlitedb.ime_properties.get( 'language_filter') if language_filter in ('cm0', 'cm1', 'cm2', 'cm3', 'cm4'): OPTION_DEFAULTS['chinesemode'] = int(language_filter[-1]) def_full_width_punct = self.tabsqlitedb.ime_properties.get( 'def_full_width_punct') if (def_full_width_punct and type(def_full_width_punct) == type(u'') and def_full_width_punct.lower() in [u'true', u'false']): OPTION_DEFAULTS['tabdeffullwidthpunct'] = ( def_full_width_punct.lower() == u'true') OPTION_DEFAULTS['endeffullwidthpunct'] = ( def_full_width_punct.lower() == u'true') def_full_width_letter = self.tabsqlitedb.ime_properties.get( 'def_full_width_letter') if (def_full_width_letter and type(def_full_width_letter) == type(u'') and def_full_width_letter.lower() in [u'true', u'false']): OPTION_DEFAULTS['tabdeffullwidthletter'] = ( def_full_width_letter.lower() == u'true') OPTION_DEFAULTS['endeffullwidthletter'] = ( def_full_width_letter.lower() == u'true') always_show_lookup = self.tabsqlitedb.ime_properties.get( 'always_show_lookup') if (always_show_lookup and type(always_show_lookup) == type(u'') and always_show_lookup.lower() in [u'true', u'false']): OPTION_DEFAULTS['alwaysshowlookup'] = ( always_show_lookup.lower() == u'true') select_keys_csv = self.tabsqlitedb.ime_properties.get('select_keys') if select_keys_csv: # select_keys_csv is something like: "1,2,3,4,5,6,7,8,9,0" OPTION_DEFAULTS['lookuptablepagesize'] = len( select_keys_csv.split(",")) auto_select = self.tabsqlitedb.ime_properties.get('auto_select') if (auto_select and type(auto_select) == type(u'') and auto_select.lower() in [u'true', u'false']): OPTION_DEFAULTS['autoselect'] = auto_select.lower() == u'true' auto_commit = self.tabsqlitedb.ime_properties.get('auto_commit') if (auto_commit and type(auto_commit) == type(u'') and auto_commit.lower() in [u'true', u'false']): OPTION_DEFAULTS['autocommit'] = auto_commit.lower() == u'true' orientation = self.tabsqlitedb.get_orientation() OPTION_DEFAULTS['lookuptableorientation'] = orientation # if space is a page down key, set the option # “spacekeybehavior” to “True”: page_down_keys_csv = self.tabsqlitedb.ime_properties.get( 'page_down_keys') if page_down_keys_csv: self._page_down_keys = [ IBus.keyval_from_name(x) for x in page_down_keys_csv.split(',')] if IBus.KEY_space in self._page_down_keys: OPTION_DEFAULTS['spacekeybehavior'] = True # if space is a commit key, set the option # “spacekeybehavior” to “False” (overrides if space is # also a page down key): commit_keys_csv = self.tabsqlitedb.ime_properties.get('commit_keys') if commit_keys_csv: self._commit_keys = [ IBus.keyval_from_name(x) for x in commit_keys_csv.split(',')] if IBus.KEY_space in self._commit_keys: OPTION_DEFAULTS['spacekeybehavior'] = False auto_wildcard = self.tabsqlitedb.ime_properties.get('auto_wildcard') if (auto_wildcard and type(auto_wildcard) == type(u'') and auto_wildcard.lower() in [u'true', u'false']): OPTION_DEFAULTS['autowildcard'] = auto_wildcard.lower() == u'true' single_wildcard_char = self.tabsqlitedb.ime_properties.get( 'single_wildcard_char') if (single_wildcard_char and type(single_wildcard_char) == type(u'')): if len(single_wildcard_char) > 1: single_wildcard_char = single_wildcard_char[0] OPTION_DEFAULTS['singlewildcardchar'] = single_wildcard_char multi_wildcard_char = self.tabsqlitedb.ime_properties.get( 'multi_wildcard_char') if (multi_wildcard_char and type(multi_wildcard_char) == type(u'')): if len(multi_wildcard_char) > 1: multi_wildcard_char = multi_wildcard_char[0] OPTION_DEFAULTS['multiwildcardchar'] = multi_wildcard_char
def main(): def debug_print(message): if _OPTIONS.debug: print(message) if not _OPTIONS.only_index: try: os.unlink(_OPTIONS.name) except: pass debug_print('Processing Database') db = tabsqlitedb.TabSqliteDb(filename=_OPTIONS.name, user_db=None, create_database=True) def parse_source(f): _attri = [] _table = [] _gouci = [] patt_com = re.compile(r'^###.*') patt_blank = re.compile(r'^[ \t]*$') patt_conf = re.compile(r'[^\t]*=[^\t]*') patt_table = re.compile(r'([^\t]+)\t([^\t]+)\t([0-9]+)(\t.*)?$') patt_gouci = re.compile(r' *[^\s]+ *\t *[^\s]+ *$') for line in f: if (not patt_com.match(line)) and (not patt_blank.match(line)): for _patt, _list in ((patt_table, _table), (patt_gouci, _gouci), (patt_conf, _attri)): if _patt.match(line): _list.append(line) break if not _gouci: # The user didn’t provide goucima (goucima = 構詞碼 = # “word formation keys”) in the table source, so we use # the longest encoding for a single character as the # goucima for that character. # # Example: # # wubi-jidian86.txt contains: # # a 工 99454797 # aaa 工 551000000 # aaaa 工 551000000 # aaad 工期 5350000 # ... and more matches for compounds containing 工 # # The longest key sequence to type 工 as a single # character is “aaaa”. Therefore, the goucima of 工 is # “aaaa” (There is one other character with the same goucima # in wubi-jidian86.txt, 㠭 also has the goucima “aaaa”). gouci_dict = {} for line in _table: res = patt_table.match(line) if res and len(res.group(2)) == 1: if res.group(2) in gouci_dict: if len(res.group(1)) > len(gouci_dict[res.group(2)]): gouci_dict[res.group(2)] = res.group(1) else: gouci_dict[res.group(2)] = res.group(1) for key in gouci_dict: _gouci.append('%s\t%s' % (key, gouci_dict[key])) _gouci.sort() return (_attri, _table, _gouci) def parse_pinyin(f): _pinyins = [] patt_com = re.compile(r'^#.*') patt_blank = re.compile(r'^[ \t]*$') patt_py = re.compile(r'(.*)\t(.*)\t(.*)') patt_yin = re.compile(r'[a-z]+[1-5]') for line in f: if type(line) != type(u''): line = line.decode('utf-8') if (not patt_com.match(line)) and (not patt_blank.match(line)): res = patt_py.match(line) if res: yins = patt_yin.findall(res.group(2)) for yin in yins: _pinyins.append("%s\t%s\t%s" \ % (res.group(1), yin, res.group(3))) return _pinyins[:] def parse_extra(f): _extra = [] patt_com = re.compile(r'^###.*') patt_blank = re.compile(r'^[ \t]*$') patt_extra = re.compile(r'(.*)\t(.*)') for line in f: if type(line) != type(u''): line = line.decode('utf-8') if (not patt_com.match(line)) and (not patt_blank.match(line)): if patt_extra.match(line): _extra.append(line) return _extra def pinyin_parser(f): for pinyin_line in f: if type(pinyin_line) != type(u''): pinyin_line = pinyin_line.decode('utf-8') _zi, _pinyin, _freq = pinyin_line.strip().split() yield (_pinyin, _zi, _freq) def phrase_parser(f): phrase_list = [] for line in f: if type(line) != type(u''): line = line.decode('utf-8') xingma, phrase, freq = line.split('\t')[:3] if phrase == 'NOSYMBOL': phrase = u'' phrase_list.append((xingma, phrase, int(freq), 0)) return phrase_list def goucima_parser(f): for line in f: if type(line) != type(u''): line = line.decode('utf-8') zi, gcm = line.strip().split() yield (zi, gcm) def attribute_parser(f): for line in f: if type(line) != type(u''): line = line.decode('utf-8') try: attr, val = line.strip().split('=') except: attr, val = line.strip().split('==') attr = attr.strip().lower() val = val.strip() yield (attr, val) def extra_parser(f): extra_list = [] for line in f: if type(line) != type(u''): line = line.decode('utf-8') phrase, freq = line.strip().split() _tabkey = db.parse_phrase(phrase) if _tabkey: extra_list.append((_tabkey, phrase, freq, 0)) else: print('No tabkeys found for “%s”, not adding.\n' % phrase) return extra_list def get_char_prompts(f): ''' Returns something like ("char_prompts", "{'a': '日', 'b': '日', 'c': '金', ...}") i.e. the attribute name "char_prompts" and as its value the string representation of a Python dictionary. ''' char_prompts = {} start = False for line in f: if type(line) != type(u''): line = line.decode('utf-8') if re.match(r'^BEGIN_CHAR_PROMPTS_DEFINITION', line): start = True continue if not start: continue if re.match(r'^END_CHAR_PROMPTS_DEFINITION', line): break match = re.search(r'^(?P<char>[^\s]+)[\s]+(?P<prompt>[^\s]+)', line) if match: char_prompts[match.group('char')] = match.group('prompt') return ("char_prompts", repr(char_prompts)) if _OPTIONS.only_index: debug_print('Only create Indexes') debug_print('Optimizing database ') db.optimize_database() debug_print('Create Indexes ') db.create_indexes('main') debug_print('Done! :D') return 0 # now we parse the ime source file debug_print('\tLoad sources "%s"' % _OPTIONS.source) patt_s = re.compile(r'.*\.bz2') _bz2s = patt_s.match(_OPTIONS.source) if _bz2s: source = bz2.BZ2File(_OPTIONS.source, "r").read() else: source = open(_OPTIONS.source, mode='r', encoding='UTF-8').read() source = source.replace('\r\n', '\n') source = source.split('\n') # first get config line and table line and goucima line respectively debug_print('\tParsing table source file ') attri, table, gouci = parse_source(source) debug_print('\t get attribute of IME :)') attributes = list(attribute_parser(attri)) attributes.append(get_char_prompts(source)) debug_print('\t add attributes into DB ') db.update_ime(attributes) db.create_tables('main') # second, we use generators for database generating: debug_print('\t get phrases of IME :)') phrases = phrase_parser(table) # now we add things into db debug_print('\t add phrases into DB ') db.add_phrases(phrases) if db.ime_properties.get('user_can_define_phrase').lower() == u'true': debug_print('\t get goucima of IME :)') goucima = goucima_parser(gouci) debug_print('\t add goucima into DB ') db.add_goucima(goucima) if db.ime_properties.get('pinyin_mode').lower() == u'true': debug_print('\tLoad pinyin source \"%s\"' % _OPTIONS.pinyin) _bz2p = patt_s.match(_OPTIONS.pinyin) if _bz2p: pinyin_s = bz2.BZ2File(_OPTIONS.pinyin, "r") else: pinyin_s = open(_OPTIONS.pinyin, 'r') debug_print('\tParsing pinyin source file ') pyline = parse_pinyin(pinyin_s) debug_print('\tPreapring pinyin entries') pinyin = pinyin_parser(pyline) debug_print('\t add pinyin into DB ') db.add_pinyin(pinyin) debug_print('Optimizing database ') db.optimize_database() if (db.ime_properties.get('user_can_define_phrase').lower() == u'true' and _OPTIONS.extra): debug_print('\tPreparing for adding extra words') db.create_indexes('main') debug_print('\tLoad extra words source "%s"' % _OPTIONS.extra) _bz2p = patt_s.match(_OPTIONS.extra) if _bz2p: extra_s = bz2.BZ2File(_OPTIONS.extra, 'r') else: extra_s = open(_OPTIONS.extra, 'r') debug_print('\tParsing extra words source file ') extraline = parse_extra(extra_s) debug_print('\tPreparing extra words lines') extrawords = extra_parser(extraline) debug_print('\t we have %d extra phrases from source' % len(extrawords)) # first get the entry of original phrases from # phrases-[(xingma, phrase, int(freq), 0)] orig_phrases = {} for phrase in phrases: orig_phrases.update({"%s\t%s" % (phrase[0], phrase[1]): phrase}) debug_print('\t the len of orig_phrases is: %d' % len(orig_phrases)) extra_phrases = {} for extraword in extrawords: extra_phrases.update( {"%s\t%s" % (extraword[0], extraword[1]): extraword}) debug_print('\t the len of extra_phrases is: %d' % len(extra_phrases)) # pop duplicated keys for phrase in extra_phrases: if phrase in orig_phrases: extra_phrases.pop(phrase) debug_print('\t %d extra phrases will be added' % len(extra_phrases)) new_phrases = list(extra_phrases.values()) debug_print('\tAdding extra words into DB ') db.add_phrases(new_phrases) debug_print('Optimizing database ') db.optimize_database() if _OPTIONS.index: debug_print('Create Indexes ') db.create_indexes('main') else: debug_print("We don't create an index on the database, " + "you should only activate this function " + "for distribution purposes.") db.drop_indexes('main') debug_print('Done! :D')