def test_jaconv(): logging.info("=========================================") logging.info("= jaconv =") logging.info("=========================================") test_cases = get_test_cases() for tc in test_cases: title = tc['title'] body = tc['body'] logging.info("ひらがな(全角) to カタカナ(全角) for %s" % title) calc_time(jaconv.hira2kata, body) logging.debug("result: %s" % jaconv.hira2hkata(body)) logging.info("カタカナ(全角) to ひらがな(全角) for %s" % title) calc_time(jaconv.kata2hira, body) logging.debug("result: %s" % jaconv.kata2hira(body)) logging.info("ひらがな(全角) to カタカナ(半角) for %s" % title) calc_time(jaconv.hira2hkata, body) logging.debug("result: %s" % jaconv.hira2hkata(body)) logging.info("半角 to 全角 for %s" % title) calc_time(jaconv.h2z, body) logging.debug("result: %s" % jaconv.h2z(body)) logging.info("全角 to 半角 for %s" % title) calc_time(jaconv.z2h, body) logging.debug("result: %s" % jaconv.z2h(body))
def kataconv(content): try: #Disable mecab by commenting the following 3 lines kanji = input("Convert kanji?: ") if kanji == "Y"or kanji == "y" or kanji == "y" or kanji == "Y": content = mecab.parse(content) content = content.replace("\n","") content = content.replace("‖"," "*32)#act as enter, change num here to fit content = jaconv.normalize(content) content = jaconv.z2h(content) content = jaconv.hira2hkata(content) print("Output content: "+content) return content except Exception as error: print(error)
def binconv(string): lst = list(string) data = [] digi = [] for item in lst: item = jaconv.hira2hkata(item) item = jaconv.z2h(item) item = convmap(item) data.append(item) for stri in data: try: if len(stri) == 1 and ord(stri)<= 128: digi.append(int(ord(stri))) else: digi.append(int(stri,16)) except: digi.append(int(255)) return digi
def conductMain(): file = file1.get() root, ext = os.path.splitext(file) #db_name = input_box.get() #print(db_name) #csvの時 if ext == '.csv': #dataの読み取り・登録 df = pd.read_csv(file, encoding="shift-jis") dbname = "database.db" #dbname = str(db_name) + ".db" c = sqlite3.connect(dbname) cur = c.cursor() try: ddl = 'CREATE TABLE data (id INTEGER PRIMARY KEY AUTOINCREMENT, input_name STRING, input_name_kana STRING, content TEXT)' cur.execute(ddl) c.commit() except sqlite3.OperationalError: pass tkinter.messagebox.showinfo( "Start", "データベースを作成します\n完了画面が出るまで何も触らずにお待ちください") for k in tqdm(range(len(df))): input_value = df.loc[k, '得意先略称'][1:] input_value_han_kana = df.loc[k, '名カナ'][1:] input_value_kana = jaconv.hira2hkata(input_value_han_kana) input_value_kana = jaconv.h2z(input_value_kana, digit=True, ascii=True) content = df.loc[k, '備考'][1:] c.execute( 'INSERT INTO data(input_name, input_name_kana,content) VALUES("' + str(input_value) + '", "' + str(input_value_kana) + '", "' + str(content) + '")') c.commit() tkinter.messagebox.showinfo("完了", "データベースが作成されました") #csv以外 else: tkinter.messagebox.showinfo("不適切なファイル形式です", "csvファイルを指定してください") newwindow.destroy()
def test_hira2hkata(): assert_equal(jaconv.hira2hkata('ともえまみ'), 'トモエマミ') assert_equal(jaconv.hira2hkata('ともえまみ', ignore='み'), 'トモエマみ') _compare(jaconv.hira2hkata, HIRAGANA, HALF_KANA)
def main(): input_filename = args.input output_filename = args.output with_nation = args.nation # validation if not os.path.exists(input_filename): print(input_filename, 'は存在しません。') sys.exit(1) # 都道府県、市区町村データを格納する変数 prefs = {} # 全国のデータが必要であれば付与 if with_nation: prefs['0'] = { "code": "0", "name": "全国", "yomi": "ぜんこく", "yomi_kana": "ゼンコク", "yomi_kana_han": "ゼンコク", "cities": {}, "seirei": {}, } ################################################## # # 都道府県、市区町村(政令指定都市の区を除く)の処理 # ################################################## # excelのシート読み込み df_city = pd.read_excel(input_filename, usecols='A:E', header=None, skiprows=[0]) # カラム名変換 # 団体コード, 都道府県名, 市区町村名, 都道府県名(カナ), 市区町村名(カナ) df_city.rename(columns={ 0: 'code', 1: 'pref_name', 2: 'city_name', 3: 'pref_yomi_kana', 4: 'city_yomi_kana' }, inplace=True) # 1行ずつ処理 for i, row in df_city.iterrows(): pref_code = row['code'] // 10000 if isinstance(row['city_name'], float): # 都道府県 pref = { 'code': str(pref_code), 'name': row['pref_name'], 'yomi': jaconv.kata2hira(jaconv.h2z(row['pref_yomi_kana'])), 'yomi_kana': jaconv.h2z(row['pref_yomi_kana']), 'yomi_kana_han': row['pref_yomi_kana'], 'cities': {}, 'seirei': {}, } prefs[pref_code] = pref else: # 市区町村 city_code = int(row['code']) city = { 'code': str(city_code), 'name': row['city_name'], 'yomi': jaconv.kata2hira(jaconv.h2z(row['city_yomi_kana'])), 'yomi_kana': jaconv.h2z(row['city_yomi_kana']), 'yomi_kana_han': row['city_yomi_kana'] } prefs[pref_code]['cities'][city_code] = city #################### # # 政令指定都市の処理 # #################### # excelシート読み込み df_seirei = pd.read_excel(input_filename, usecols='A:C', sheet_name=1, header=None) # カラム名変換 # 団体コード, 市区町村名, 市区町村名(かな) df_seirei.rename(columns={ 0: 'code', 1: 'city_name', 2: 'city_yomi' }, inplace=True) # 1行ずつ処理 seirei_code = 0 for i, row in df_seirei.iterrows(): pref_code = row['code'] // 10000 if '区' in row['city_name']: # 政令指定都市の区 city_code = row['code'] city = { 'code': str(city_code), 'parent_seirei_code': str(seirei_code), 'name': row['city_name'], 'yomi': row['city_yomi'], 'yomi_kana': jaconv.hira2kata(row['city_yomi']), 'yomi_kana_han': jaconv.hira2hkata(row['city_yomi']) } prefs[pref_code]['seirei'][seirei_code][city_code] = city else: # 政令指定都市 seirei_code = row['code'] prefs[pref_code]['seirei'][seirei_code] = {} ######################## # # jsonファイルに書き出し # ######################## with open(output_filename, 'w', encoding='utf-8') as f: json.dump(prefs, f, indent=2, ensure_ascii=False)