Exemple #1
0
def gen_2gram_dict_separated():
    data = csv_wrapper.load_csv('2gram_original_no_edit.csv')
    separated_2gram_dict = {
        word: int(count)
        for count, word, gram_type in data
    }
    return separated_2gram_dict
Exemple #2
0
def show_good_data_rows():
    home_keys = 'うたんとかの。し'
    good_data_rows = csv_wrapper.load_csv('good_data_rows.csv')
    prev_cost1each = 0
    prev_cost2each = 0
    for row in good_data_rows[:100]:
        print_list = []
        cost2fin, layer1keys, layer2keys = row
        print_list.append(home_keys)
        print_list.append(layer1keys)
        print_list.append(layer2keys)
        cost1each = [
            gram_data.get_1gram_cost(keys)
            for keys in zip(home_keys, layer1keys, layer2keys)
        ]
        print_list.append(cost1each)
        cost2each = [
            gram_data.get_2gram_cost(keys)
            for keys in zip(home_keys, layer1keys, layer2keys)
        ]
        print_list.append(cost2each)
        cost1all = sum(cost1each)
        if prev_cost1each == cost1each and prev_cost2each == cost2each:
            print('same')
            continue
        prev_cost1each = cost1each
        prev_cost2each = cost2each
        print(tabulate(print_list))
        print(cost2fin, cost1all)
        break
Exemple #3
0
def gen_good_data_rows():
    home_keys = 'うたんとかの。し'
    not_home_keys = 'てくなにきはこるがでっょすま:;'
    cost_dict = gen_cost_dict(home_keys, not_home_keys)
    csv_ = csv_wrapper.load_csv('final_output.csv')
    petterns_ = [[1 if i == e else 0 for i in ints for e in range(8)]
                 for ints in permutations(range(8))]
    # petterns_ = petterns_[:100]
    petterns = list(zip(*petterns_))
    np_petterns = np.array(petterns)
    good_data_rows = []
    for row in tqdm(csv_):
        tuple_str, score = row
        second_keys = eval(tuple_str)
        rest_keys = list(set(not_home_keys) - set(second_keys))
        cost_list = [
            cost_dict[(hk, ok, rk)] for hk, ok in zip(home_keys, second_keys)
            for rk in rest_keys
        ]
        costs = np.dot(cost_list, np_petterns)
        costs_with_permutation = [
            (cost, second_keys, rest_key_pattern)
            for cost, rest_key_pattern in zip(costs, permutations(rest_keys))
        ]
        good_data_rows.extend(costs_with_permutation)
        good_data_rows.sort(key=itemgetter(0), reverse=False)
        good_data_rows = good_data_rows[:10000]
        good_data_rows = [
            (cost, ''.join(second_keys), ''.join(rest_key_pattern))
            for cost, second_keys, rest_key_pattern in good_data_rows
        ]
    csv_wrapper.save_csv('good_data_rows.csv', good_data_rows)
Exemple #4
0
def _save_1gram():
    data = csv_wrapper.load_csv('1gram_original_no_edit.csv')
    dict_ = dict()
    for count, word, gram_type in data:
        count = int(count)
        dict_[word] = count
    del dict_['〓']
    dict_['。'] += dict_['、']
    del dict_['、']
    print(dict_)
    csv_wrapper.save_csv('1gram.csv', dict_.items())
Exemple #5
0
def calc_hand_of_first_layer():
    home_keys = 'うたんとかの。し'
    good_data_rows = csv_wrapper.load_csv('good_data_rows.csv')
    """
    -----  -----  ------  -----  -----  -----  -----  -----
    う      た      ん       と      か      の      。      し
    く      な      る       て      き      に      :      は
    ;      で      っ       ま      こ      が      ょ      す
    86814  83163  104495  85031  89982  88830  73629  93979
    339    1361   549     1191   1470   2372   82     2257
    -----  -----  ------  -----  -----  -----  -----  -----
    9621 705923
    """
    for row in good_data_rows[:100]:
        print_list = []
        cost2fin, layer1keys, layer2keys = row
        break
    fingers = [keys for keys in zip(home_keys, layer1keys, layer2keys)]
    fingers.extend([
        'い',
    ])
    data_rows = []
    for r_fins in combinations(fingers, 5):
        l_fins = set(fingers) - set(r_fins)
        # print(r_fins)
        r_letters = flatten(r_fins)
        l_letters = flatten(l_fins)
        if 'い' not in r_letters:
            continue
        r_hand_1cost = gram_data.get_1gram_cost(r_letters)
        l_hand_1cost = gram_data.get_1gram_cost(l_letters)
        r_hand_2cost = gram_data.get_2gram_cost(r_letters)
        l_hand_2cost = gram_data.get_2gram_cost(l_letters)
        cross_hand_cost = r_hand_2cost + l_hand_2cost
        data_rows.append((cross_hand_cost, l_hand_2cost, r_hand_2cost,
                          l_hand_1cost, r_hand_1cost, l_fins, r_fins))
    data_rows.sort(key=itemgetter(0), reverse=False)
    for cross_hand_cost, l_hand_2cost, r_hand_2cost, l_hand_1cost, r_hand_1cost, l_fins, r_fins in data_rows:
        fin_print = list(l_fins)
        fin_print.extend(r_fins)
        fin_print = [''.join(x) for x in fin_print]
        l_fins_1costs = [gram_data.get_1gram_cost(l_fin) for l_fin in l_fins]
        r_fins_1costs = [gram_data.get_1gram_cost(r_fin) for r_fin in r_fins]
        cost_print = list(l_fins_1costs)
        cost_print.extend(r_fins_1costs)
        all_print = list(zip(fin_print, cost_print))
        all_print = list(zip(*all_print))
        print(tabulate(all_print))
        print(cross_hand_cost, l_hand_2cost, r_hand_2cost, l_hand_1cost,
              r_hand_1cost)
Exemple #6
0
def _convert_2gram_both_direction():
    data = csv_wrapper.load_csv('2gram_original_no_edit.csv')
    two_gram_dict = dict()
    both_direction_two_gram_dict = dict()
    for count, word, gram_type in data:
        count = int(count)
        two_gram_dict[word] = count
        if word in both_direction_two_gram_dict:
            both_direction_two_gram_dict[word] += count
        else:
            both_direction_two_gram_dict[word] = count
        reversed_word = word[1] + word[0]
        if reversed_word in both_direction_two_gram_dict:
            both_direction_two_gram_dict[reversed_word] += count
        else:
            both_direction_two_gram_dict[reversed_word] = count
    """check sum"""
    print(two_gram_dict['あい'])
    print(two_gram_dict['いあ'])
    print(both_direction_two_gram_dict['あい'])
    print(both_direction_two_gram_dict['いあ'])
    """put 0 missing unfamous keys"""
    one_gram_dict = load_one_gram()
    keys = one_gram_dict.keys()
    for k0, k1 in itertools.product(keys, keys):
        key = ''.join([k0, k1])
        if key not in both_direction_two_gram_dict:
            both_direction_two_gram_dict[key] = 0
    """merge 、 and 。
    del 、 add 。"""
    print(both_direction_two_gram_dict['す。'])
    print(both_direction_two_gram_dict['す、'])
    for word, count in both_direction_two_gram_dict.items():
        if '、' in word:
            replaced_word = word.replace('、', '。')
            both_direction_two_gram_dict[replaced_word] += count
    del_keys = [
        word for word, count in both_direction_two_gram_dict.items()
        if '、' in word
    ]
    for del_key in del_keys:
        del both_direction_two_gram_dict[del_key]
    print(both_direction_two_gram_dict['す。'])
    print('す、' in both_direction_two_gram_dict)

    csv_wrapper.save_csv('2gram_no_order.csv',
                         both_direction_two_gram_dict.items())
Exemple #7
0
def load_one_gram():
    one_gram_list = csv_wrapper.load_csv('1gram.csv')
    one_gram_dict = dict(one_gram_list)
    one_gram_dict = {k: int(v) for k, v in one_gram_dict.items()}
    return one_gram_dict
Exemple #8
0
def load_ngram_dict():
    one_gram_dict = load_one_gram()
    two_gram_list = csv_wrapper.load_csv('2gram_no_order.csv')
    two_gram_dict = dict(two_gram_list)
    two_gram_dict = {k: int(v) for k, v in two_gram_dict.items()}
    return one_gram_dict, two_gram_dict