def setup_db(): conn = sqlite3.connect('cmn/cedict.db') cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") if cursor.fetchall() == []: # db is empty sys.stdout.write("Initializing Database\n") sys.stdout.write(" reading dictionary data\n") dictfile = open('cmn/cedict_ts.u8') # Create db and load data sys.stdout.write(" creating table\n") cursor.execute('''CREATE TABLE entries (traditional text, simplified text, pinyin text, definitions text, variants text, measure_word text)''') sys.stdout.write(" loading data into the database...\n") for ch, chs, py, ds, vs, mws in cedict.iter_cedict(dictfile): _py = py.replace(' ', '') _ds = "|".join(ds) _vs = "|".join([k + ":" + v for d in vs for k, v in d.items()]) _mws = "|".join([";".join(mw) for mw in mws]) entry = (ch, chs, _py, _ds, _vs, _mws) cursor.execute("INSERT INTO entries VALUES (?,?,?,?,?,?)", entry) conn.commit() sys.stdout.write("DONE\n") conn.close()
def search_pinyin(word): dictfile = open('cmn/cedict_ts.u8') uword = word.decode('utf-8') match = set() for ch, chs, py, _, _, _ in cedict.iter_cedict(dictfile): if uword == ch or uword == chs: match.add(py) return match
def get_cemap(): global __cemap if not __cemap: cedict_path = Path(__file__).parent / "cedict/cedict.txt" infile = open(cedict_path, "r", encoding="UTF-8") __cemap = {} for ch, chs, pinyin, defs, variants, mw in cedict.iter_cedict(infile): __cemap.setdefault(ch, []).extend(defs) return __cemap
def import_dictionary(file_name): infile = open(file_name) for ch_trad, ch_simp, trans, defs, variants, mw in cedict.iter_cedict(infile): # If the traditional and the simple are the same then only insert one if ch_trad == ch_simp: store_def_boi(ch_trad, trans, defs) else: store_def_boi(ch_trad, trans, defs) store_def_boi(ch_simp, trans, defs) infile.close()
def setup_db(self): self.create_conf_dir() conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Create table cursor.execute('''CREATE TABLE entries (traditional text, simplified text, pinyin text, definitions text, variants text, measure_word text)''') for ch, chs, py, ds, vs, mws in cedict.iter_cedict(self.args.infile): _py = py.replace(' ', '') _ds = "|".join(ds) _vs = "|".join([k + ":" + v for d in vs for k, v in d.items()]) _mws = "|".join([";".join(mw) for mw in mws]) entry = (ch, chs, _py, _ds, _vs, _mws) cursor.execute("INSERT INTO entries VALUES (?,?,?,?,?,?)", entry) conn.commit() conn.close()