def test_han_to_zen(): eq_(u'アイウエオ', mojimoji.han_to_zen(u'アイウエオ')) eq_(u'ガギグゲゴ', mojimoji.han_to_zen(u'ガギグゲゴ')) eq_(u'パピプペポ', mojimoji.han_to_zen(u'パピプペポ')) eq_(u'0123', mojimoji.han_to_zen(u'0123')) eq_(u'abcABC', mojimoji.han_to_zen(u'abcABC')) eq_(u'#?!', mojimoji.han_to_zen(u'#?!')) eq_(u'あいうえお', mojimoji.han_to_zen(u'あいうえお'))
def wakachi(self): u"""分かち書きを行う Returns: 辞書型で結果を返す """ md=config.m_mecab_dic tagger=MeCab.Tagger(md.option) tagger.parse('') emoji=re.compile(u'^U00') kigou=re.compile(u'^[!-~]$') # 全角半角を正規化 self.text=mojimoji.zen_to_han(self.text,kana=False,digit=True,ascii=True) self.text=mojimoji.han_to_zen(self.text,kana=True,digit=False,ascii=False) node=tagger.parseToNode(self.text.encode('utf-8')) words=[] while node: pos=node.feature.split(",")[md.pos] if pos=="形容詞" or pos == "形容動詞" or pos=="動詞" or pos=="名詞": if len(node.feature.split(","))<=md.base: base = node.surface else: base=node.feature.split(",")[md.base] if base == "*": base = node.surface # 絵文字、ひらがな、カタカナ一文字は除外 if (emoji.match(unicode(base)) is not None) or (kigou.match(unicode(base)) is not None): pass # ストップワードに含まれたら除外 elif unicode(base) in get_stopwords(): pass else: # 大文字は小文字化して格納する words.append(base.lower()) node=node.next wakachi=map(str,words) wakachi = " ".join(wakachi) if "\n" in wakachi: wakachi=wakachi.split("\n",1)[0].strip() self.wakachigaki=wakachi return {'_id':self.id,'screen_name':self.screen_name,'text':self.text,'wakachi':wakachi}
def _check_ng(kwd, ng_list): for ng in ng_list: if ng == "": return False if kwd.find(ng) >= 0: return True if kwd.lower().find(ng) >= 0: return True if kwd.find(mojimoji.han_to_zen(ng.decode('utf-8')).encode('utf-8')) >= 0: return True
def nlp_preprocessing(self, text: str): for val in self.list_replace_text: if isinstance(val, list) or isinstance(val, tuple): b_rep, a_rep = val text = re.sub(b_rep, a_rep, text) elif isinstance(val, str): if val == "hantozen": # 半角、全角の変換 text = mojimoji.han_to_zen(text) elif val == "tokenize": text = " ".join( [x.text for x in self.spacy_tokenizer(text)]) # Tokenize ret = self.tokenizer.tokenize(text) # Replace Tokens ret = np.array(ret) for b_rep, a_rep in self.list_replace_token: ret[ret == b_rep] = a_rep return ret.tolist()
def preprocessing_text(text): # 半角・全角の統一 text = mojimoji.han_to_zen(text) # 改行、半角スペース、全角スペースを削除 text = re.sub('\r', '', text) text = re.sub('\n', '', text) text = re.sub(' ', '', text) text = re.sub(' ', '', text) # 数字文字の一律「0」化 text = re.sub(r'[0-9 0-9]+', '0', text) # 数字 # カンマ、ピリオド以外の記号をスペースに置換 for p in string.punctuation: if (p == ".") or (p == ","): continue else: text = text.replace(p, " ") return text
def preprocessing_text(self, text): # 半角・全角の統一 text = mojimoji.han_to_zen(text) # 改行、半角スペース、全角スペースを削除 text = re.sub("\r", "", text) text = re.sub("\n", "", text) text = re.sub(" ", "", text) text = re.sub(" ", "", text) # 数字文字の一律「0」化 text = re.sub(r"[0-9 0-9]+", "0", text) # 数字 # カンマ、ピリオド以外の記号をスペースに置換 for p in string.punctuation: if (p == ".") or (p == ","): continue else: text = text.replace(p, " ") return text
def _convert_chars(self, chars): '''文字列に対して前処理を実施します Arguments: chars (str): アイテム名 Returns: 変換後のアイテム名 ''' chars = mojimoji.han_to_zen(chars, digit=False, ascii=False) chars = mojimoji.zen_to_han(chars, kana=False).lower() chars = ''.join( list(filter(lambda c: c not in self.delete_chars, chars))) # 1-9までの数字を全て0に変換 chars = ''.join( list( map( lambda c: '0' if c in [str(i) for i in range(1, 10)] else c, chars))) chars = chars.replace(' ', '') return chars
def format_line(base_string): """英字をすべて小文字へ, 半角を全角へ、#〜や@~を削除, URLを削除, 日付の削除""" small_string = base_string.lower() zen_string = mojimoji.zen_to_han(small_string, digit=False, kana=False) han_string = mojimoji.han_to_zen(zen_string, digit=False, ascii=False) formatted_string = "".join(c for c in han_string if c not in emoji.UNICODE_EMOJI) patterns = [r"@\w*", r"#(\w+)", r"(http(s)?(:)?//[\w | /]*)"] for replace in [ "'", '"', ';', '.', ',', '-', '!', '?', '=', "(", ")", "「", "」", "|", "『", "』" ]: formatted_string = formatted_string.replace(replace, "") for pattern in [re.compile(i) for i in patterns]: formatted_string = re.sub(pattern, "", formatted_string) return formatted_string
def to_wakati(self, text, allow_word_class=[ '名詞', '指示詞', '動詞', '形容詞', '判定詞', '助動詞', '副詞', '助詞', '接続詞', '連体詞', '感動詞', '接頭辞', '特殊', '未定義語' ], remove_stopwords=False, genkei=False): wkt = "" text = mojimoji.han_to_zen(text) rst = self.jumanpp.analysis(text) for mrph in rst.mrph_list(): # midasi, yomi, genkei, hinsi, bunrui, katuyou1, katuyou2, imis, repname if remove_stopwords and (mrph.genkei in self.stopwords): continue if mrph.hinsi in allow_word_class: if genkei: wkt += mrph.genkei + ' ' else: wkt += mrph.midasi + ' ' return wkt
def sentence_splitter(doc): # 句点(!?。.)で分割(ただし句点に」』が続く場合(文頭が」』の場合)は前後を繋げる) # 「行く事は行くがじき帰る。来年の夏休みにはきっと帰る」 のような場合には分けてしまう sent_splitter = nltk.RegexpTokenizer('[^!?。.\n]*[!?。.\n]*') sentences = sent_splitter.tokenize(doc) sentences = map(lambda s: mojimoji.han_to_zen(s.strip()), filter(None, sentences)) if '「' not in doc and '」' not in doc: return filter(None, sentences) prev = '' n_sentences = [] for i, s in enumerate(sentences): if s.startswith('」') or s.startswith('』') or s.startswith(')')\ or s.startswith('】') or s.startswith('”') or s.startswith('\]'): prev = prev + s else: n_sentences.append(prev) prev = s n_sentences.append(prev) return filter(None, map(lambda s: s.strip(), n_sentences))
def update(): data = request.get_json() if data.get("password") != cfg["password"]: raise InvalidPassword("The password is not correct") updated = db_handler.update_page( url=data.get("url"), is_hidden=data.get("is_hidden"), is_about_covid_19=data.get("is_about_COVID-19"), is_useful=data.get("is_useful"), is_about_false_rumor=data.get("is_about_false_rumor"), is_positive=data.get("is_positive"), icountry=data.get("new_displayed_country"), etopics=data.get("new_classes"), notes=han_to_zen(str(data.get("notes"))), ) log_handler.extend_topic_check_log( [json.dumps(updated, ensure_ascii=False)]) return jsonify(updated)
def make_skills_from_charasheet(sheet: str, sl_as_limit: bool) -> List[Skill]: # If it seems a entire sheet, drop others match_begin = skill_area_begin_regex.search(sheet) match_end = skill_area_end_regex.search(sheet) if match_begin is not None: begin = match_begin.end() else: begin = 0 if match_end is not None: end = match_end.start() else: end = len(sheet) sheet = sheet[begin:end] # Escape slashes like '1/Sn', 'SL/Sr', and so on check_set_before = set([str(i) for i in range(20)] + ['sl', 'SL']) check_set_after = set( ['sn', 'sr', 'Sn', 'Sr', 'SN', 'SR', 'mp', 'MP', 'Mp']) for b in check_set_before: for a in check_set_after: sheet = sheet.replace(f'{b}/{a}', f'{b}{replace_text_slash}{a}') # Zenkakify all Kana characters sheet = mojimoji.han_to_zen(sheet, digit=False, ascii=False) # Check lines skills = [] for line in sheet.split('\n'): skill = make_skill_from_text(line, sl_as_limit) if skill is not None: skills.append(skill) # Repair escaped slash for skill in skills: if skill.usage_limitation is not None: skill.usage_limitation = skill.usage_limitation.replace( replace_text_slash, '/') return skills
def clensing(text): text = re.sub("\<.+?\>", "", text) text = text.lower() text = re.sub("\[.+?\]", "", text) text = mojimoji.han_to_zen(mojimoji.zen_to_han(text, kana=False, ascii=False), digit=False) # 数字だけ半角で、カナとローマ字は全角 # 同意義語の表記統一 text = re.sub("best", "ベスト", text) text = re.sub("successsqi", "サクセスsqi", text) text = re.sub("else", "els", text) text = re.sub("openes", "エントリーシート", text) text = re.sub("open es", "エントリーシート", text) text = re.sub("oes", "エントリーシート", text) text = re.sub("es", "エントリーシート", text) text = re.sub("se", "システムエンジニア", text) text = re.sub("gd", "グループディスカッション", text) text = re.sub("hp", "ホームページ", text) text = re.sub("ピーアール", "pr", text) text = re.sub("pg", "プログラマー", text) text = re.sub("gc", "ゲームクリエイター", text) text = re.sub("ウェブ", "web", text) text = re.sub("コミュニケーション力", "コミュニケーション能力", text) text = re.sub("コニニケーション", "コミュニケーション", text) text = re.sub("コミュニティーション", "コミュニケーション", text) text = re.sub("かんばる", "頑張る", text) text = re.sub("がんばる", "頑張る", text) text = re.sub("かんばって", "頑張って", text) text = re.sub("gpa", "gpa ", text) # gpa3.? の場合に gp a3 で分かち書きされるためにgpaの後に空白追加 text = re.sub("it", "ict", text) # ictの方が現代の言葉なので表記揺れ回避 text = mojimoji.zen_to_han(text, kana=False, digit=False) # 単語の英字1〜2文字以下の場合は削除する 例:I am student. -> I, am は削除する text = re.sub("[ ][a-z]{1,2}[ ]", "", text) # ( )で囲まれた部分を削除する 例:<br /> text = re.sub("\(.+?\)", "", text) return text
def re_def(filepass): nameData = "" with codecs.open(filepass, 'r', encoding='utf-8', errors='ignore') as f: l = "" re_half = re.compile(r'[!-~]') # 半角記号,数字,英字 re_full = re.compile(r'[︰-@]') # 全角記号 re_full2 = re.compile( r'[、。・’〜:<>_|「」{}【】『』〈〉“”○〇〔〕…――――─◇]') # 全角で取り除けなかったやつ re_url = re.compile(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-…]+') re_tag = re.compile(r"<[^>]*?>") #HTMLタグ re_n = re.compile(r'\n') # 改行文字 re_space = re.compile(r'[\s+]') #1以上の空白文字 re_num = re.compile(r"[0-9]") pattern = "(.*) (.*)" #全角スペースで分ける i = 0 for line in f: if re_num.match(line): #半角数字は全角数字にする line = mojimoji.han_to_zen(line, ascii=False) #if '○' in line: if line.find('○', 0, 10) == 0: if i: yield nameData, l l = "" sep = re.search(pattern, line) nameData = sep.group(1) nameData = nameData.replace("君", "") nameData = nameData.replace("○", "") line = line.replace(sep.group(1), "") i = 1 line = re_half.sub("", line) line = re_full.sub("", line) line = re_url.sub("", line) line = re_tag.sub("", line) line = re_n.sub("", line) line = re_space.sub("", line) line = re_full2.sub(" ", line) l += line yield nameData, l
def __init__(self): self.driver = SeleniumUtils.getChromedriver(__file__) self.verificationErrors = [] # target month setting if len(sys.argv) > 2: warn_mes = "Sample:\n python3 sbi_monthly.py [write his memo month]" print(warn_mes) sys.exit() param_han_month = None if len(sys.argv) == 2: param_han_month = sys.argv[1] han_month = str(datetime.datetime.today().month + 1) self.month = mojimoji.han_to_zen( han_month if param_han_month is None else param_han_month) # all member login info config = FileUtils.open_file(__file__, "/config.json") # "sbib_login_info":{"uid": "user_id", "upa": "user_pass", "uspa": "user_tra_pass"}, sbib = config["sbib"] self.login_info = sbib["sbib_login_info"] self.move_money_info = sbib["move_money_info"]
def extract_txt_from_xls(xls_file, txt_file, split_sent=True, segment=True): from pyknp import Juman import mojimoji juman = Juman() json_dict = read_xls(xls_file) tmp_file = txt_file if not split_sent else 'tmp.raw' with open(tmp_file, 'w', encoding="utf-8") as fo: for report in json_dict['読影所見'].values(): fo.write('%s\n' % report['findings']) if split_sent: script = '''cat tmp.raw | perl sentence-splitter.pl | python split_tnm.py > tmp.sent''' subprocess.Popen(script, shell=True).wait() if segment: with open('tmp.sent', 'r', encoding='utf-8') as fi, open(txt_file, 'w', encoding='utf-8') as fo: for line in fi: unspace_line = ''.join(line.strip().split()) if not unspace_line: continue seg_line = ' '.join([w.midasi for w in juman.analysis(mojimoji.han_to_zen(unspace_line)).mrph_list()]) fo.write('%s\n' % seg_line) os.remove('tmp.raw') os.remove('tmp.sent')
def zenkaku_hankaku(text): # カタカナ半角を全角に, 数字英字全角を半角に re = mojimoji.zen_to_han(text, kana=False) re = mojimoji.han_to_zen(re, digit=False, ascii=False) return re
def __init__(self, bert_tokenizer: BertTokenizer, jp_tokenizer: JumanTokenizer, args, file_path='train', block_size=512): assert os.path.isfile(file_path) directory, filename = os.path.split(file_path) cached_features_file = os.path.join( directory, 'dialogue_for_nsp' + '_cached_lm_' + str(block_size) + '_' + filename) if os.path.exists(cached_features_file) and not args.overwrite_cache: logger.info("Loading features from cached file %s", cached_features_file) with open(cached_features_file, 'rb') as handle: self.examples, \ self.token_type_ids, \ self.attention_mask, \ self.next_sentence_label = pickle.load(handle) else: # キャッシュされたデータファイルがなければテキストファイルからデータセットを作成 logger.info("Creating features from dataset file at %s", directory) self.examples = [] # [CLS] A A A [SEP] B B B [SEP] self.token_type_ids = [] # 0 0 0 0 0 1 1 1 1 self.attention_mask = [] # 1 1 1 1 1 1 1 1 1 0 0 0 0 ... self.next_sentence_label = [] # [0, 1] 0: isNext, 1: notNext with open(file_path, encoding="utf-8") as f: docs = f.readlines() exsamples = [] ZEN = "".join(chr(0xff01 + i) for i in range(94)) HAN = "".join(chr(0x21 + i) for i in range(94)) HAN2ZEN = str.maketrans(HAN, ZEN) num_doc = len(docs) for idx, line in enumerate(docs): text = line.rstrip(os.linesep) if text == "": continue try: next_text = docs[idx + 1].rstrip(os.linesep) except IndexError: continue if next_text == "": continue if random.random() > args.nsp_swap_ratio: while True: rand_idx = random.randrange(0, num_doc) next_text = docs[rand_idx].rstrip(os.linesep) if (not next_text == "") and (rand_idx != idx + 1): break nsp_label = 1 # random sequence else: nsp_label = 0 # continuation sequence # jumanエラー対策 text = text.replace(' ', ' ') next_text = next_text.replace(' ', ' ') text = mojimoji.han_to_zen(text, kana=False, digit=True, ascii=True) next_text = mojimoji.han_to_zen(next_text, kana=False, digit=True, ascii=True) text = text.translate(HAN2ZEN) next_text = next_text.translate(HAN2ZEN) # 元テキストを区切った状態に if len(text.encode('utf-8')) > 4096 or len( next_text.encode('utf-8')) > 4096: continue first_tokenized_text = bert_tokenizer.convert_tokens_to_ids( bert_tokenizer.tokenize(" ".join( jp_tokenizer.tokenize(text)))) second_tokenized_text = bert_tokenizer.convert_tokens_to_ids( bert_tokenizer.tokenize(" ".join( jp_tokenizer.tokenize(next_text)))) fst_len = len(first_tokenized_text) scd_len = len(second_tokenized_text) # for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size # self.examples.append(bert_tokenizer.build_inputs_with_special_tokens(tokenized_text[i:i+block_size])) # Note that we are loosing the last truncated example here for the sake of simplicity (no padding) # If your dataset is small, first you should loook for a bigger one :-) and second you # can change this behavior by adding (model specific) padding. # add special tokens # A A A (B B B) -> [CLS] A A A [SEP] (B B B [SEP]) added_special = bert_tokenizer.build_inputs_with_special_tokens( token_ids_0=first_tokenized_text, token_ids_1=second_tokenized_text) # token type ids type_ids = [0] * (2 + fst_len) scd_type = [1] * (1 + scd_len) type_ids += scd_type attention_mask = [1] * len(added_special) # Zero-pad up to the sequence length. diff = block_size - len(added_special) if diff < 0: added_special = added_special[:diff] type_ids = type_ids[:diff] attention_mask = attention_mask[:diff] else: padding = [0] * (block_size - len(added_special)) padding_1 = [0] * (block_size - len(added_special)) padding_2 = [0] * (block_size - len(added_special)) added_special += padding type_ids += padding_1 attention_mask += padding_2 assert len(added_special) == block_size assert len(type_ids) == block_size assert len(attention_mask) == block_size self.examples.append(added_special) self.token_type_ids.append(type_ids) self.attention_mask.append(attention_mask) self.next_sentence_label.append(nsp_label) logger.info("Saving features into cached file %s", cached_features_file) with open(cached_features_file, 'wb') as handle: pickle.dump([ self.examples, self.token_type_ids, self.attention_mask, self.next_sentence_label ], handle, protocol=pickle.HIGHEST_PROTOCOL)
def h2z(filename): with open(filename, "r") as f: for line in f: print(mojimoji.han_to_zen(line), end="")
def juman_tokenize(line, tagger=False): return common_tokenize(mojimoji.han_to_zen(line).replace("\u3000", " "))
def normalize(self, src_text): # Zenkaku to Hankaku ( handling japaneze character ) normalized = mojimoji.han_to_zen(src_text, digit=False, ascii=False) normalized = mojimoji.zen_to_han(normalized, kana=False) return normalized.lower()
def check(target_feature, text_list): if (target_feature == 'moji'): col = 'mozi' else: col = target_feature l = [list(map(mojimoji.zen_to_han, text_list))] model_path = '../model' judge_num = len(l[0]) l.insert(0, [10000 + i for i in range(judge_num)]) l.append([0 for i in range(judge_num)]) l_np = np.array(l).T columns = ['tweet_id', 'body', col] df_judge = pd.DataFrame(data=l_np, columns=columns) feature_ds = pd.read_pickle(model_path + '/data/haiti_' + target_feature + '_feature_ds.plk') indices = np.loadtxt(model_path + '/data/haiti_' + target_feature + '_indices.csv', delimiter=',') m = MeCab.Tagger("") # それぞれの文書を取り出して形態素解析 length_list = [] text_list = [] for sentence in df_judge["body"]: ma = m.parse(sentence) word_list = [] # 形態解析後の単語だけ抽出 for text in ma.split("\n"): word_list.append(text.split("\t")[0]) # 単語の数を集計 length_list.append(len(word_list)) # 単語の頻度を集計 data = collections.Counter(word_list) text_data = pd.DataFrame.from_dict(data, orient='index') # text_data = text_data.fillna(0) text_list.append(text_data) feature = pd.concat([feature_ds] + text_list, axis=1) #Nanを0に置換 feature = feature.fillna(0) ## 各文書に対して全体で頻出の上位k個の単語の出現数をその文書の単語出現数で割ったものを変数とする ## modi_feature = [] for index, row in feature.iloc[indices].T[-judge_num:].reset_index( drop=True).iterrows(): modi_feature_temp = row / length_list[index] modi_feature.append(modi_feature_temp) modi_feature = pd.concat(modi_feature, axis=1).T # 各文書と作成した特徴量を結合 df_judge_feature = pd.concat([df_judge, modi_feature], axis=1) df_judge_feature = df_judge_feature.drop(["tweet_id", "body"], axis=1) judge_x = df_judge_feature.drop(col, axis=1) judge_x model = pickle.load( open(model_path + '/haiti_' + target_feature + '_model.pkl', 'rb')) judge = model.predict(judge_x) judge_list = [] if (target_feature == 'day'): for i in judge: if (i > 0.5): judge_list.append(2) else: judge_list.append(1) else: judge_list = np.argmax(judge, axis=1) # print(judge_list) if (target_feature == 'moji'): all_moji_list = [ chr(i) for i in range(ord('A'), ord('Z') + 1) ] + [chr(i) for i in range(ord('あ'), ord('ん') + 1) ] + [chr(i) for i in range(ord('ア'), ord('ン') + 1)] judge_moji_num = copy(judge_list) judge_list = [] for j in range(len(judge_moji_num)): judge_moji = all_moji_list[judge_moji_num[j]] judge_list.append(mojimoji.han_to_zen(judge_moji, ascii=False)) # print(judge_list) # judge_list = list(map(mojimoji.han_to_zen,judge_list,ascii=False)) return judge_list
def drawItems(self): pygame.draw.rect(self.screen, BLACK, Rect(200, 20, 180, 215)) pygame.draw.rect(self.screen, WHITE, Rect(200, 20, 180, 215), 5) for i in range(len(self.player.item_list)): drawChar(self.screen, moji.han_to_zen(self.player.item_list[i][1]), 240, 20 + i * 40)
def download(): # 郵便番号データ(ローマ字)のダウンロード response = requests.get( "https://www.post.japanpost.jp/zipcode/dl/roman/ken_all_rome.zip?190712" ) if response.status_code != 200: e = Exception(f"HTTP status : {response.status_code}") raise e # ダウンロードデータのファイル出力 with open("ken_all_rome.zip", "wb") as file: file.write(response.content) # zipファイルの解凍 with zipfile.ZipFile("./ken_all_rome.zip") as zip: zip.extractall("./") rome_dic = {} rome_file_path = "./KEN_ALL_ROME.CSV" with codecs.open(rome_file_path, "r", encoding="shift-jis") as rome: reader = csv.reader(rome) for row in reader: zip_code = row[0] prefecture_name = row[1] city_name = row[2].replace(" ", "") town_name = re.sub(r'^(.*?)((.*)?$', r'\1', row[3]).replace(" ", "").replace("以下に掲載がない場合", "") prefecture_rome_name = row[4] city_rome_name = row[5].replace(" ", "-").lower() town_rome_name = re.sub(r'^(.*?)(\(.*)?$', r'\1', row[6]).replace( " ", "").replace("IKANIKEISAIGANAIBAAI", "").lower() rome_dic[f"{prefecture_name},{city_name},,"] = { "prefecture_name": prefecture_rome_name, "city_name": city_rome_name, "town_name": "" } rome_dic[f"{prefecture_name},{city_name},{town_name},"] = { "prefecture_name": prefecture_rome_name, "city_name": city_rome_name, "town_name": town_rome_name } # 郵便番号データのダウンロード response = requests.get( "https://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip") if response.status_code != 200: e = Exception(f"HTTP status : {response.status_code}") raise e # ダウンロードデータのファイル出力 with open("ken_all.zip", "wb") as file: file.write(response.content) # zipファイルの解凍 with zipfile.ZipFile("./ken_all.zip") as zip: zip.extractall("./") # 文字コード変換(shift-jis -> utf-8) src_file_path = "./KEN_ALL.CSV" dest_file_path = "./KEN_ALL_UTF8.CSV" kana_dic = {} with codecs.open(src_file_path, "r", encoding="shift-jis") as src, codecs.open( dest_file_path, "w", encoding="utf-8") as dest: reader = csv.reader(src) area_code = "" zip_code = "" city_kana_name = "" town_kana_name = "" prefecture_name = "" city_name = "" town_name = "" town_short_name = "" town_ext_name = "" city_rome_name = "" town_rome_name = "" town_duplicate_flag = "" town_multi_flag = "" zip_code_branch_no = {} same_zip_code = False rows = [] building_flag = 0 exclude_building = False building_prefix = "" building_kana_prefix = "" for row in reader: if zip_code.replace("-", "") == row[2] and ("(" in town_name) and ( not town_name.endswith(")")): same_zip_code = True else: same_zip_code = False if not same_zip_code and zip_code: if town_name == "以下に掲載がない場合": town_short_name = "" town_ext_name = town_name town_short_kana_name = "" town_ext_kana_name = town_kana_name else: town_short_name = re.sub(r"(.*)", "", town_name) town_ext_name = town_name.replace(town_short_name, "", 1) town_short_kana_name = re.sub(r"(.*)", "", town_kana_name) town_ext_kana_name = town_kana_name.replace( town_short_kana_name, "", 1) if town_ext_name == "(次のビルを除く)": exclude_building = True building_flag = 0 town_duplicate_flag = 1 building_prefix = town_short_name building_kana_prefix = town_short_kana_name elif exclude_building and town_short_name.startswith( building_prefix) and town_ext_name != "": building_flag = 1 town_duplicate_flag = 1 town_ext_name = f"{town_short_name}{town_ext_name}".replace( building_prefix, "", 1) town_short_name = building_prefix town_ext_kana_name = f"{town_short_kana_name}{town_ext_kana_name}".replace( building_kana_prefix, "", 1) town_short_kana_name = building_kana_prefix else: building_flag = 0 exclude_building = False if zip_code in zip_code_branch_no: zip_code_branch_no[zip_code] += 1 else: zip_code_branch_no[zip_code] = 1 key = f"{prefecture_name},{city_name},{town_short_name}," if key in rome_dic: city_rome_name = rome_dic[key]["city_name"] town_rome_name = rome_dic[key]["town_name"] else: city_rome_name = "" town_rome_name = "" kana_dic[f"{prefecture_name},{city_name},,"] = { "city_name": city_kana_name, "town_name": "" } kana_dic[ f"{prefecture_name},{city_name},{town_short_name},"] = { "city_name": city_kana_name, "town_name": town_short_kana_name } dest.write( f"{zip_code},{zip_code_branch_no[zip_code]},{area_code},{prefecture_name},{city_name},{city_kana_name},{city_rome_name},{town_short_name},{town_short_kana_name},{town_rome_name},{town_ext_name},{town_ext_kana_name},{town_duplicate_flag},{building_flag}\n" ) area_code = row[0] # 全国地方公共団体コード # xxx = row[1] # 旧郵便番号5桁 zip_code = re.sub(r'([0-9]{3})([0-9]{4})', r'\1-\2', row[2]) # 郵便番号 # xxx = row[3] # 都道府県名(半角カタカナ) city_kana_name = mojimoji.han_to_zen(row[4]) # 市区町村名(半角カタカナ) town_kana_name = row[ 5] if not same_zip_code else f"{town_kana_name}{row[5]}" # 町域名(半角カタカナ) town_kana_name = mojimoji.han_to_zen(town_kana_name) prefecture_name = row[6] # 都道府県名 city_name = row[7] # 市区町村名 town_name = row[ 8] if not same_zip_code else f"{town_name}{row[8]}" # 町域 town_name = town_name.replace("−", "-").replace("〜", "~") town_duplicate_flag = row[ 9] # 一町域が二以上の郵便番号で表される場合の表示 (注3) (「1」は該当、「0」は該当せず) # xxx = row[10] # 小字毎に番地が起番されている町域の表示 (注4) (「1」は該当、「0」は該当せず) # xxx = row[11] # 丁目を有する町域の場合の表示 (「1」は該当、「0」は該当せず) town_multi_flag = row[ 12] # 一つの郵便番号で二以上の町域を表す場合の表示 (注5) (「1」は該当、「0」は該当せず) # xxx = row[13] # 更新の表示(注6)(「0」は変更なし、「1」は変更あり、「2」廃止(廃止データのみ使用)) # xxx = row[14] # 変更理由 (「0」は変更なし、「1」市政・区政・町政・分区・政令指定都市施行、「2」住居表示の実施、「3」区画整理、「4」郵便区調整等、「5」訂正、「6」廃止(廃止データのみ使用)) town_short_name = re.sub(r"(.*)", "", town_name) town_ext_name = town_name.replace(town_short_name, "", 1) town_short_kana_name = re.sub(r"(.*)", "", town_kana_name) town_ext_kana_name = town_kana_name.replace(town_short_kana_name, "", 1) if exclude_building and town_short_name.startswith( building_prefix) and town_ext_name != "": building_flag = 1 town_duplicate_flag = 1 town_ext_name = f"{town_short_name}{town_ext_name}".replace( building_prefix, "", 1) town_short_name = building_prefix town_ext_kana_name = f"{town_short_kana_name}{town_ext_kana_name}".replace( building_kana_prefix, "", 1) town_short_kana_name = building_kana_prefix else: building_flag = 0 exclude_building = False if zip_code in zip_code_branch_no: zip_code_branch_no[zip_code] += 1 else: zip_code_branch_no[zip_code] = 1 key = f"{prefecture_name},{city_name},{town_short_name}," if key in rome_dic: city_rome_name = rome_dic[key]["city_name"] town_rome_name = rome_dic[key]["town_name"] else: city_rome_name = "" town_rome_name = "" kana_dic[f"{prefecture_name},{city_name},,"] = { "city_name": city_kana_name, "town_name": "" } kana_dic[f"{prefecture_name},{city_name},{town_short_name},"] = { "city_name": city_kana_name, "town_name": town_short_kana_name } dest.write( f"{zip_code},{zip_code_branch_no[zip_code]},{area_code},{prefecture_name},{city_name},{city_kana_name},{city_rome_name},{town_short_name},{town_short_kana_name},{town_rome_name},{town_ext_name},{town_ext_kana_name},{town_duplicate_flag},{building_flag}\n" ) # 郵便番号データ(大口事業所)のダウンロード response = requests.get( "https://www.post.japanpost.jp/zipcode/dl/jigyosyo/zip/jigyosyo.zip") if response.status_code != 200: e = Exception(f"HTTP status : {response.status_code}") raise e # ダウンロードデータのファイル出力 with open("jigyosyo.zip", "wb") as file: file.write(response.content) # zipファイルの解凍 with zipfile.ZipFile("./jigyosyo.zip") as zip: zip.extractall("./") # 文字コード変換(shift-jis -> utf-8) src_file_path = "./JIGYOSYO.CSV" dest_file_path = "./JIGYOSYO_UTF8.CSV" with codecs.open(src_file_path, "r", encoding="cp932") as src, codecs.open( dest_file_path, "w", encoding="utf-8") as dest: reader = csv.reader(src) area_code = "" office_name = "" office_kana_name = "" zip_code = "" prefecture_name = "" city_name = "" city_kana_name = "" city_rome_name = "" town_name = "" town_kana_name = "" town_ext_name = "" town_rome_name = "" office_flag = 0 post_office_box_flag = 0 zip_code_branch_no = {} rows = [] for row in reader: area_code = row[0] # 全国地方公共団体コード office_kana_name = mojimoji.han_to_zen(row[1]) # 大口事業所名(カナ) office_name = row[2] # 大口事業所名(漢字) prefecture_name = row[3] # 都道府県名 city_name = row[4] # 市区町村名 town_name = row[5] # 町域名 town_kana_name = "" town_rome_name = "" town_ext_name = row[6] # 小字名、丁目、番地等 town_ext_kana_name = "" zip_code = re.sub(r'([0-9]{3})([0-9]{4})', r'\1-\2', row[7]) # 郵便番号 # xxx = row[8] # 旧郵便番号5桁 # xxx = row[9] # 取扱局 office_flag = 1 if row[10] == "0" else 0 # 「0」大口事業所、「1」私書箱 post_office_box_flag = 1 if row[10] == "1" else 0 # 「0」大口事業所、「1」私書箱 # xxx = row[11] # 複数番号の有無 # xxx = row[12] # 修正コード #city_kana_name = row[4] # 市区町村名(半角カタカナ) #town_kana_name = row[5] if not same_zip_code else f"{town_kana_name}{row[5]}" # 町域名(半角カタカナ) if zip_code in zip_code_branch_no: zip_code_branch_no[zip_code] += 1 else: zip_code_branch_no[zip_code] = 1 key = f"{prefecture_name},{city_name},{town_name}," if key == "東京都,千代田区,猿楽町,": town_name = "神田猿楽町" elif key == "東京都,千代田区,三崎町,": town_name = "神田三崎町" key = f"{prefecture_name},{city_name},{town_name}," if key in rome_dic: city_rome_name = rome_dic[key]["city_name"] town_rome_name = rome_dic[key]["town_name"] else: town_name_exclude_aza = "" if "ケ" in town_name: town_name_exclude_aza = town_name.replace("ケ", "ヶ") elif "ヶ" in town_name: town_name_exclude_aza = town_name.replace("ヶ", "ケ") elif "字" in town_name: town_name_exclude_aza = re.sub(r"(大)?字", "", town_name) elif "通" in town_name: town_name_exclude_aza = re.sub(r"(.*?)[0123456789].*", r"\1", town_ext_name) key = f"{prefecture_name},{city_name},{town_name_exclude_aza}," if town_name_exclude_aza and key in rome_dic: city_rome_name = rome_dic[key]["city_name"] if "通" in town_name: town_rome_name = "" else: town_rome_name = rome_dic[key]["town_name"] else: key = f"{prefecture_name},{city_name},," if key in rome_dic: city_rome_name = rome_dic[key]["city_name"] town_rome_name = rome_dic[key]["town_name"] else: city_rome_name = "" town_rome_name = "" key = f"{prefecture_name},{city_name},{town_name}," if key in kana_dic: city_kana_name = kana_dic[key]["city_name"] town_kana_name = kana_dic[key]["town_name"] else: town_name_exclude_aza = "" if "ケ" in town_name: town_name_exclude_aza = town_name.replace("ケ", "ヶ") elif "ヶ" in town_name: town_name_exclude_aza = town_name.replace("ヶ", "ケ") elif "字" in town_name: town_name_exclude_aza = re.sub(r"(大)?字", "", town_name) elif "通" in town_name: town_name_exclude_aza = re.sub(r"(.*?)[0123456789].*", r"\1", town_ext_name) key = f"{prefecture_name},{city_name},{town_name_exclude_aza}," if town_name_exclude_aza and key in kana_dic: city_kana_name = kana_dic[key]["city_name"] if "通" in town_name: town_ext_kana_name = kana_dic[key]["town_name"] else: town_kana_name = kana_dic[key]["town_name"] else: key = f"{prefecture_name},{city_name},," if key in kana_dic: city_kana_name = kana_dic[key]["city_name"] town_kana_name = kana_dic[key]["town_name"] else: city_kana_name = "" town_kana_name = "" dest.write( f"{zip_code},{zip_code_branch_no[zip_code]},{area_code},{prefecture_name},{city_name},{city_kana_name},{city_rome_name},{town_name},{town_kana_name},{town_rome_name},{town_ext_name},{town_ext_kana_name},{office_name},{office_kana_name},{office_flag},{post_office_box_flag}\n" )
def preprocess(self, text): text = re.sub("&[^;]+;", " ", text) text = mojimoji.han_to_zen(text, digit=False) # text = re.sub('(\s| |#)+', " ", text) return text
#!/usr/bin/env python3 import sys import argparse import mojimoji """ """ if __name__=="__main__": parser = argparse.ArgumentParser() parser.add_argument("input", help="input file") args = parser.parse_args() with open(args.input,"r") as i: for line in i: line = line.strip() if line.startswith("zh:"): result = [] tokens = line.split(" ") for token in tokens[1:]: result.append(mojimoji.han_to_zen(token)) print("zh: %s" % (" ".join(result))) else: print(line)
def tokenize_jumandic(text): text = mojimoji.han_to_zen(text).replace('\u3000', ' ') return tagger_jumandic.parse(text).rstrip('\n')
def make_stopwords(): u"""コピペ用ストップワードを作成して表示 """ import mojimoji import cnvk stopwords=set() hira=u"あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもらりるれろやゐゆゑよわをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽぁぃぅぇぉゃゅょっゔ" kata=[] for h in hira: kata.append(cnvk.convert(h,cnvk.HIRA2KATA,cnvk.Z_KATA)) kata.append(u"ヴ") hankata=[] for k in kata: hankata.append(mojimoji.zen_to_han(k)) kazu=u"0123456789" stopwords.add(u"10") stopwords.add(u"11") stopwords.add(u"12") stopwords.add(u"13") stopwords.add(u"14") stopwords.add(u"15") stopwords.add(u"16") stopwords.add(u"17") stopwords.add(u"18") stopwords.add(u"19") stopwords.add(u"20") stopwords.add(u"10") stopwords.add(u"11") stopwords.add(u"12") stopwords.add(u"13") stopwords.add(u"14") stopwords.add(u"15") stopwords.add(u"16") stopwords.add(u"17") stopwords.add(u"18") stopwords.add(u"19") stopwords.add(u"20") zenkazu=mojimoji.han_to_zen(kazu) kazukan=u"一二三四五六七八九十百千万億兆" minialpha=u"abcdefghijklmnopqlstuvwxyz" bigalpha=u"ABCDEFGHIJKLMNOPQLSTUVWXYZ" han_minialpha=mojimoji.han_to_zen(minialpha) han_bigalpha=mojimoji.han_to_zen(bigalpha) hiramoji=[u"する",u"なる",u"てる",u"れる",u"やる",u"いる",u"さん",u"なん",u"くん",u"それ",u"こと",\ u"ちゃん",u"ある",u"これ",u"して",u"くれる",u"くださる",u"そう",u"せる",u"した",u"いか",\ u"ので",u"よう",u"てるん",u"もん",u"られる",u"あそこ",u"あたり",u"あちら",u"あっち",u"あと",\ u"あな",u"あなた",u"あれ",u"いくつ",u"いつ",u"いま",u"いろいろ",u"うち",u"おおまか",u"おまえ",u"おれ", u"がい",u"かく",u"かたちの",u"かやの",u"から",u"がら",u"きた",u"こせ",u"ここ",u"こっち",u"こと",u"ごと",\ u"こちら",u"これ",u"これら",u"ごろ",u"さまざま",u"さらい",u"しかた",u"しよう",u"すか",u"ずつ",u"すね",\ u"そう",u"そこ",u"そちら",u"そっち",u"そで",u"それ",u"それぞれ",u"それなり",u"たくさん",u"たち",u"たび",\ u"ため",u"ちゃ",u"てん",u"とおり",u"とき",u"どこ",u"どこか",u"ところ",u"どちら",u"どれ",u"なか",u"なかば",\ u"なに",u"など",u"なん",u"はじめ",u"はず",u"はるか",u"ひと",u"ひとつ",u"ふく",u"ぶり",u"べつ",u"へん",u"べん",\ u"ほう",u"ほか",u"まさ",u"まし",u"まとも",u"まま",u"みたい",u"みつ",u"みなさん",u"みんな",u"もと",u"もの",\ u"もん",u"やつ",u"よう",u"よそ",u"わけ",u"わたし",u"くる",u"すぎる",u"れる",u"いう",u"くださる",u"ちゃう",\ u"つく",u"せる",u"てるん",u"すぎ",u"ところ",u"おれ",u"ぼく",u"わたし",u"てる",u"しまう",u"みる", ] katamoji=[] for h in hiramoji: katamoji.append(cnvk.convert(h,cnvk.HIRA2KATA,cnvk.Z_KATA)) han_katamoji=[] for k in katamoji: han_katamoji.append(mojimoji.zen_to_han(k)) kanmoji=["笑","今","気","今日","明日","方","人","俺","私","僕","時","思う","行く","言う","見る","出す","年","月","日","分","秒","週","火","水","木","金","土","国","都",\ "道","府","県","市","区","町","村","各","第","何","的","度","達","誰","者","類","用","別","等","際","系","品","化","所","毎","回","匹","個","席","束","歳","円","毎",\ "前","後","左","右","次","先","春","夏","秋","冬","下記","上記","時間","今回","前回","場合","自分","ヶ所","ヵ所","カ所","箇所","ヶ月","カ月","箇月","名前","本当","確か","時点",\ "様々","結局","半ば","以前","以後","以降","未満","以上","以下","毎日","自体","何人","手段","感じ","同じ","点","君"] h_kigou=cnvk.H_KIGO kigou=[] for h in h_kigou: for x in h: kigou.append(x) kigou.append(u"ω") kigou.append(u'ー') kigou.append(u"д") #参考 内容推測に適したキーワード抽出のための日本語ストップワード(https://www.jstage.jst.go.jp/article/jjske/12/4/12_511/_pdf) kokubu_words=[u"ない",u"高い",u"多い",u"少ない","強い","大きい","小さい","長い","ながい", u"良い",u"よい",u"いい","悪い", u"ある","いる","なる","行く","いく","来る","とる", "見る","みる","言う","いう","得る","過ぎる","すぎる", "する","やる","行なう","行う","おこなう","出来る","できる", "おもう","思う","考える","かんがえる","わかる","見える", "知る","しれる","いえる","示す","述べる","書く","かく","よる", "異なる","違う","ちがう","くらべる", "入れる","出る","でる","入る","はいる", "使う","用いる","もちいる","持つ","もつ","作る","つくる", "なす","起こる","おこる","つく","つける","聞く","よぶ", "かれる","つまり","上","下","次","つぎ", "わが国","自分","人々","人びと","別","他","間","話","例","形","日","家","手","名","身", "そのもの","一つ","あと", #2016/01/24 更に偏在度の高いものと、忘れてたひらがなを追加 "きゃ","きゅ","きょ","しゃ","しゅ","しょ","ちゃ","ちゅ","ちょ","にゃ","にゅ","にょ", "ひゃ","ひゅ","ひょ","みゃ","みゅ","みょ","りゃ","りゅ","りょ","ゎ", "事","目","とこ","中","字","お前","全部","きみ","もらう", ] for h in hira: stopwords.add(h) for k in kata: stopwords.add(k) for h in hankata: stopwords.add(h) for k in kazu: stopwords.add(k) for z in zenkazu: stopwords.add(z) for k in kazukan: stopwords.add(k) for m in minialpha: stopwords.add(m) for b in bigalpha: stopwords.add(b) for h in han_minialpha: stopwords.add(h) for h in han_bigalpha: stopwords.add(h) for h in hiramoji: stopwords.add(h) for k in katamoji: stopwords.add(k) for h in han_katamoji: stopwords.add(h) for k in kanmoji: stopwords.add(unicode(k)) for k in kigou: stopwords.add(k) for k in kokubu_words: stopwords.add(unicode(k)) print "set([", for s in sorted(stopwords): print "u\"{0}\",".format(s), print "])"
def zenhan_normalize(texts): han_texts = mojimoji.zen_to_han(texts, kana=False) zen_texts = mojimoji.han_to_zen(han_texts, digit=False, ascii=False) return zen_texts
import sys import csv import mojimoji args = sys.argv with open(args[1]) as f: reader = csv.reader(f, delimiter='\t') ls = [row for row in reader] result_ls = [] for l in ls: # print(mojimoji.han_to_zen(l[0],ascii=False)) # print(mojimoji.han_to_zen(l[0])) result_strs = mojimoji.han_to_zen(l[0]) result_ls.append(result_strs) # print(result_ls) # print(result_ls[0]) result_rows = [] for i in range(len(result_ls)): # print('[{}][{}]'.format(result_ls[i],ls[i][1])) result_rows.append([result_ls[i], ls[i][1]]) print(result_rows) # print(result_rows[0])
def update(self): self.count += 1 pygame.draw.rect(self.screen, BLACK, Rect(50, 20, 140, 140)) pygame.draw.rect(self.screen, WHITE, Rect(50, 20, 140, 140), 5) commands = ["つよさ", "どうぐ", "じゅもん"] statas_list = [ "LV :", "HP :", "MP :", "ちから :", "みのまもり:", "EX :" ] statas_player_list = [ self.player.lv, self.player.hp, self.player.mp, self.player.attack, self.player.defence, self.player.exp ] for i in range(len(commands)): drawChar(self.screen, commands[i], 80, 20 + i * 40) self.drawStaGol() self.drawTri(60, 39 + self.menu_select_num * 40, self.count) if self.show_statas: self.menu_select_num = 0 pygame.draw.rect(self.screen, BLACK, Rect(200, 20, 255, 250)) pygame.draw.rect(self.screen, WHITE, Rect(200, 20, 255, 250), 5) for i in range(len(statas_list)): drawChar(self.screen, statas_list[i], 220, 20 + i * 40) for j in range(len(statas_player_list)): drawChar(self.screen, moji.han_to_zen(str(statas_player_list[j])), 340, 20 + j * 40) if self.show_items: self.menu_select_num = 1 self.drawItems() if self.item_select_tri: self.drawTri(215, 37 + self.item_select_num * 40, self.count) if self.use_item_anim: drawText( self.screen, "ゆうしゃは " + moji.han_to_zen(str(self.use_item[1])) + "を つかった!", "ゆうしゃの キズが かいふくした!", "", "") if self.show_magics: self.menu_select_num = 2 pygame.draw.rect(self.screen, BLACK, Rect(200, 20, 160, 180)) pygame.draw.rect(self.screen, WHITE, Rect(200, 20, 160, 180), 5) magic_count = len(self.player.magic_list) if magic_count < 4: for i in range(magic_count): drawChar(self.screen, self.player.magic_list[i][1], 242, 28 + 37 * i) else: for i in range(4): if (self.magic_arrow_num < 4): drawChar(self.screen, self.player.magic_list[i][1], 242, 28 + 37 * i) else: drawChar( self.screen, self.player.magic_list[self.magic_arrow_num - 3 + i][1], 242, 28 + 37 * i) if self.magic_select_tri: self.magic_arrow_max_num = len(self.player.magic_list) - 1 if (self.magic_arrow_num > 3): self.magic_arrow_num_correct = 3 else: self.magic_arrow_num_correct = self.magic_arrow_num self.drawTri(215, 48 + self.magic_arrow_num_correct * 37, self.count) self.player.selected_magic = self.player.magic_list[ self.magic_arrow_num][1] self.player.selected_magic_mp = int( self.player.magic_list[self.magic_arrow_num][2]) self.player.selected_magic_damage = int( self.player.magic_list[self.magic_arrow_num][3]) self.player.selected_magic_heal = int( self.player.magic_list[self.magic_arrow_num][4]) if self.use_magic_anim: drawText( self.screen, "ゆうしゃは " + moji.han_to_zen(str(self.player.selected_magic)) + "を となえた!", "ゆうしゃの キズが かいふくした!", "", "")
def _normalize_kana(self, text): return han_to_zen(text, digit=False, ascii=False)
r = mojimoji.zen_to_han(zenAll, digit = False) print unicode_to_utf8(r) # 全角アスキー文字以外の全角文字を全て半角に変換 r = mojimoji.zen_to_han(zenAll, ascii = False) print unicode_to_utf8(r) print '\r\n----- 全角→半角変換 -----\r\n' # 半角文字を全て全角文字に変換 print '----- 半角→全角変換 -----\r\n' print 'target:アイウabc012\r\n' hanAll = u'アイウabc012' # 半角文字を全て全角文字に変換 r = mojimoji.han_to_zen(hanAll) print unicode_to_utf8(r) # 半角カナ以外の半角文字を全て全角に変換 r = mojimoji.han_to_zen(hanAll, kana = False) print unicode_to_utf8(r) # 半角数字以外の半角文字を全て全角に変換 r = mojimoji.han_to_zen(hanAll, digit = False) print unicode_to_utf8(r) # 半角アスキー文字以外の半角文字を全て全角に変換 r = mojimoji.han_to_zen(hanAll, ascii = False) print unicode_to_utf8(r) print '\r\n----- 半角→全角変換 -----\r\n'
def battleAnim(self, screen): self.battle_anim_count += 1 self.screen = screen if (self.battle_anim_count < 20): if ((self.battle_anim_count / 5) % 2): self.screen.fill(BLACK) else: self.screen.fill(WHITE) else: self.screen.fill(BLACK) self.drawMonster() if (self.battle_anim_count > 40 and self.battle_anim_count < 80): self.drawText(str(self.monster.name) + "が あらわれた。", "", "", "") if (self.battle_anim_count > 80): self.drawStatas() if (self.battle_anim_count == 80): self.command_selecting = True self.command_select_tri = True self.magic_select_tri = True if (self.command_selecting): self.drawComand() self.drawMonsterList() if (self.command_select_tri): if (self.command_arrow_num > 3): self.command_arrow_num_correct = 3 else: self.command_arrow_num_correct = self.command_arrow_num self.drawTri(65, 307 + self.command_arrow_num_correct * 37) if (self.magic_selecting): self.drawMagicList() if (self.magic_select_tri): self.magic_arrow_max_num = len(self.player.magic_list) - 1 if (self.magic_arrow_num > 3): self.magic_arrow_num_correct = 3 else: self.magic_arrow_num_correct = self.magic_arrow_num self.drawTri(205, 307 + self.magic_arrow_num_correct * 37) self.player.selected_magic = self.player.magic_list[ self.magic_arrow_num][1] self.player.selected_magic_mp = int( self.player.magic_list[self.magic_arrow_num][2]) self.player.selected_magic_damage = int( self.player.magic_list[self.magic_arrow_num][3]) self.player.selected_magic_heal = int( self.player.magic_list[self.magic_arrow_num][4]) if (self.monster_selecting): self.drawTri(235, 307) if self.item_selecting: pygame.draw.rect(self.screen, BLACK, Rect(210, 245, 180, 215)) pygame.draw.rect(self.screen, WHITE, Rect(210, 245, 180, 215), 5) for i in range(len(self.player.item_list)): drawChar(self.screen, moji.han_to_zen(self.player.item_list[i][1]), 240, 243 + i * 40) if self.item_select_tri: self.drawTri(220, 262 + self.item_select_num * 40) if self.use_item_anim: drawText( self.screen, "ゆうしゃは " + moji.han_to_zen(str(self.use_item[1])) + "を つかった!", "ゆうしゃの キズが かいふくした!", "", "") if (self.guard_anim): self.drawText("ゆうしゃは みをまもっている。", "", "", "") if (self.escape_success == 1): self.drawText("ゆうしゃは にげだした!", "", "", "") if (self.escape_success == 2): self.drawText("ゆうしゃは にげだした!", "しかし まわりこまれてしまった!", "", "") if (self.attack_player_anim): self.drawText("ゆうしゃの こうげき!", str(self.damage) + "の ダメージ!", "", "") if (self.magic_player_anim): if self.player.selected_magic_heal == 0: self.drawText( "ゆうしゃは " + self.player.magic_list[self.player.selected_magic][1] + "を となえた!", str(self.monster.name) + "に " + str(self.player.selected_magic_damage) + "の ダメージ!", "", "") else: self.drawText( "ゆうしゃは " + self.player.magic_list[self.player.selected_magic][1] + "を となえた!", "ゆうしゃの きずが かいふくした!", "", "") if (self.attack_monster_anim): self.drawText( str(self.monster.name) + "の こうげき!", "ゆうしゃに " + str(self.damage) + "の ダメージ!", "", "") if (self.you_defeate): self.drawText( str(self.monster.name) + "を やっつけた。", str(self.monster.exp) + "ポイントの けいけんちを かくとく。", str(self.monster.gold) + "ゴールドを てにいれた。", "") if (self.lv_up_anim): self.drawText( "ゆうしゃは レベルが あがった!", "ちからが " + str(self.player.lv_tables[self.player.lv][3] - self.player.lv_tables[self.player.lv - 1][3]) + "ポイント あがった!", "みのまもりが " + str(self.player.lv_tables[self.player.lv][4] - self.player.lv_tables[self.player.lv - 1][4]) + "ポイント あがった!", "さいだいHPが " + str(self.player.lv_tables[self.player.lv][1] - self.player.lv_tables[self.player.lv - 1][1]) + "ポイント あがった!") if (self.lv_up_anim_2): self.drawText( "さいだいMPが " + str(self.player.lv_tables[self.player.lv][2] - self.player.lv_tables[self.player.lv - 1][2]) + "ポイント あがった!", "", "", "") if (self.you_lose): self.drawText("ゆうしゃは しんでしまった!", "しょじきんが はんぶんになった。", "", "")
def process(cls, v: str): v = mojimoji.han_to_zen(v, kana=True, ascii=True, digit=True) v = re.sub(" +", " ", v) return v
def cleansing_unity(self, text): text = text.lower() text = mojimoji.zen_to_han(text, kana=True) text = mojimoji.han_to_zen(text, digit=False, ascii=False) return text
def drawText(self, text1, text2, text3, text4): pygame.draw.rect(self.screen, WHITE, Rect(70, 280, 500, 180), 5) self.drawChar(moji.han_to_zen(text1), 90, 290) self.drawChar(moji.han_to_zen(text2), 90, 323) self.drawChar(moji.han_to_zen(text3), 90, 356) self.drawChar(moji.han_to_zen(text4), 90, 389)