Ejemplo n.º 1
0
def test_jaconv():
    logging.info("=========================================")
    logging.info("=               jaconv                  =")
    logging.info("=========================================")
    test_cases = get_test_cases()
    for tc in test_cases:
        title = tc['title']
        body = tc['body']

        logging.info("ひらがな(全角) to カタカナ(全角) for %s" % title)
        calc_time(jaconv.hira2kata, body)
        logging.debug("result: %s" % jaconv.hira2hkata(body))

        logging.info("カタカナ(全角) to ひらがな(全角) for %s" % title)
        calc_time(jaconv.kata2hira, body)
        logging.debug("result: %s" % jaconv.kata2hira(body))

        logging.info("ひらがな(全角) to カタカナ(半角) for %s" % title)
        calc_time(jaconv.hira2hkata, body)
        logging.debug("result: %s" % jaconv.hira2hkata(body))

        logging.info("半角 to 全角 for %s" % title)
        calc_time(jaconv.h2z, body)
        logging.debug("result: %s" % jaconv.h2z(body))

        logging.info("全角 to 半角 for %s" % title)
        calc_time(jaconv.z2h, body)
        logging.debug("result: %s" % jaconv.z2h(body))
def kataconv(content):
    try:                      #Disable mecab by commenting the following 3 lines
        kanji = input("Convert kanji?: ") 
        if kanji == "Y"or kanji == "y" or kanji == "y" or kanji == "Y":
            content = mecab.parse(content) 
        content = content.replace("\n","")
        content = content.replace("‖"," "*32)#act as enter, change num here to fit
        content = jaconv.normalize(content)
        content = jaconv.z2h(content)
        content = jaconv.hira2hkata(content)
        print("Output content: "+content)
        return content
    except Exception as error:
        print(error)
def binconv(string):
    lst = list(string)
    data = []
    digi = []
    for item in lst:
        item = jaconv.hira2hkata(item)
        item = jaconv.z2h(item)
        item = convmap(item)
        data.append(item)
    for stri in data:
        try:
            if len(stri) == 1 and ord(stri)<= 128:
                digi.append(int(ord(stri)))
            else:
                digi.append(int(stri,16))
        except:
            digi.append(int(255))
    return digi
Ejemplo n.º 4
0
    def conductMain():
        file = file1.get()
        root, ext = os.path.splitext(file)
        #db_name = input_box.get()
        #print(db_name)
        #csvの時
        if ext == '.csv':
            #dataの読み取り・登録
            df = pd.read_csv(file, encoding="shift-jis")

            dbname = "database.db"  #dbname = str(db_name) + ".db"
            c = sqlite3.connect(dbname)
            cur = c.cursor()
            try:
                ddl = 'CREATE TABLE data (id INTEGER PRIMARY KEY AUTOINCREMENT, input_name STRING, input_name_kana STRING, content TEXT)'
                cur.execute(ddl)
                c.commit()
            except sqlite3.OperationalError:
                pass

            tkinter.messagebox.showinfo(
                "Start", "データベースを作成します\n完了画面が出るまで何も触らずにお待ちください")
            for k in tqdm(range(len(df))):
                input_value = df.loc[k, '得意先略称'][1:]
                input_value_han_kana = df.loc[k, '名カナ'][1:]
                input_value_kana = jaconv.hira2hkata(input_value_han_kana)
                input_value_kana = jaconv.h2z(input_value_kana,
                                              digit=True,
                                              ascii=True)
                content = df.loc[k, '備考'][1:]

                c.execute(
                    'INSERT INTO data(input_name, input_name_kana,content) VALUES("'
                    + str(input_value) + '", "' + str(input_value_kana) +
                    '", "' + str(content) + '")')
                c.commit()
            tkinter.messagebox.showinfo("完了", "データベースが作成されました")
        #csv以外
        else:
            tkinter.messagebox.showinfo("不適切なファイル形式です", "csvファイルを指定してください")

        newwindow.destroy()
Ejemplo n.º 5
0
def test_hira2hkata():
    assert_equal(jaconv.hira2hkata('ともえまみ'), 'トモエマミ')
    assert_equal(jaconv.hira2hkata('ともえまみ', ignore='み'), 'トモエマみ')
    _compare(jaconv.hira2hkata, HIRAGANA, HALF_KANA)
Ejemplo n.º 6
0
def test_hira2hkata():
    assert_equal(jaconv.hira2hkata('ともえまみ'), 'トモエマミ')
    assert_equal(jaconv.hira2hkata('ともえまみ', ignore='み'), 'トモエマみ')
    _compare(jaconv.hira2hkata, HIRAGANA, HALF_KANA)
Ejemplo n.º 7
0
def main():
    input_filename = args.input
    output_filename = args.output
    with_nation = args.nation

    # validation
    if not os.path.exists(input_filename):
        print(input_filename, 'は存在しません。')
        sys.exit(1)

    # 都道府県、市区町村データを格納する変数
    prefs = {}

    # 全国のデータが必要であれば付与
    if with_nation:
        prefs['0'] = {
            "code": "0",
            "name": "全国",
            "yomi": "ぜんこく",
            "yomi_kana": "ゼンコク",
            "yomi_kana_han": "ゼンコク",
            "cities": {},
            "seirei": {},
        }

    ##################################################
    #
    # 都道府県、市区町村(政令指定都市の区を除く)の処理
    #
    ##################################################
    # excelのシート読み込み
    df_city = pd.read_excel(input_filename,
                            usecols='A:E',
                            header=None,
                            skiprows=[0])

    # カラム名変換
    # 団体コード, 都道府県名, 市区町村名, 都道府県名(カナ), 市区町村名(カナ)
    df_city.rename(columns={
        0: 'code',
        1: 'pref_name',
        2: 'city_name',
        3: 'pref_yomi_kana',
        4: 'city_yomi_kana'
    },
                   inplace=True)

    # 1行ずつ処理
    for i, row in df_city.iterrows():
        pref_code = row['code'] // 10000

        if isinstance(row['city_name'], float):
            # 都道府県
            pref = {
                'code': str(pref_code),
                'name': row['pref_name'],
                'yomi': jaconv.kata2hira(jaconv.h2z(row['pref_yomi_kana'])),
                'yomi_kana': jaconv.h2z(row['pref_yomi_kana']),
                'yomi_kana_han': row['pref_yomi_kana'],
                'cities': {},
                'seirei': {},
            }
            prefs[pref_code] = pref
        else:
            # 市区町村
            city_code = int(row['code'])
            city = {
                'code': str(city_code),
                'name': row['city_name'],
                'yomi': jaconv.kata2hira(jaconv.h2z(row['city_yomi_kana'])),
                'yomi_kana': jaconv.h2z(row['city_yomi_kana']),
                'yomi_kana_han': row['city_yomi_kana']
            }
            prefs[pref_code]['cities'][city_code] = city

    ####################
    #
    # 政令指定都市の処理
    #
    ####################
    # excelシート読み込み
    df_seirei = pd.read_excel(input_filename,
                              usecols='A:C',
                              sheet_name=1,
                              header=None)

    # カラム名変換
    # 団体コード, 市区町村名, 市区町村名(かな)
    df_seirei.rename(columns={
        0: 'code',
        1: 'city_name',
        2: 'city_yomi'
    },
                     inplace=True)

    # 1行ずつ処理
    seirei_code = 0
    for i, row in df_seirei.iterrows():
        pref_code = row['code'] // 10000

        if '区' in row['city_name']:
            # 政令指定都市の区
            city_code = row['code']
            city = {
                'code': str(city_code),
                'parent_seirei_code': str(seirei_code),
                'name': row['city_name'],
                'yomi': row['city_yomi'],
                'yomi_kana': jaconv.hira2kata(row['city_yomi']),
                'yomi_kana_han': jaconv.hira2hkata(row['city_yomi'])
            }
            prefs[pref_code]['seirei'][seirei_code][city_code] = city
        else:
            # 政令指定都市
            seirei_code = row['code']
            prefs[pref_code]['seirei'][seirei_code] = {}

    ########################
    #
    # jsonファイルに書き出し
    #
    ########################
    with open(output_filename, 'w', encoding='utf-8') as f:
        json.dump(prefs, f, indent=2, ensure_ascii=False)