def predict_line_types(self, sc_text): '''台本テキストから行の種類を予測して改行で繋げて返す ''' # 形態素解析器 juman = JumanPsc(command=settings.JUMAN_COMMAND, option=settings.JUMAN_OPTION) # 予測モデル with open(settings.PSC_PARSE_MODEL_PATH, 'rb') as f: tree = pickle.load(f) # 台本を行に分けて、各行の種類を予測する lines = sc_text.splitlines() classes = predict(juman, tree, lines) # 行の種類を改行文字で繋げて返す line_types = (str(c) for c in classes) return '\n'.join(line_types)
def main(): in_file = sys.argv[1] out_file = sys.argv[2] normalize = False if len(sys.argv) > 3 and sys.argv[3] == '--normalize': normalize = True # 引数のファイル名を絶対パスに変換 in_path = pathlib.Path(in_file) out_path = pathlib.Path(out_file) in_file = str(in_path.resolve()) out_file = str(out_path.resolve()) juman = JumanPsc(command=JUMAN_COMMAND, option=JUMAN_OPTION) model_file = os.path.join(os.path.dirname(__file__), 'model.pkl') with open(model_file, 'rb') as f: tree = pickle.load(f) model.give_labels(juman, tree, in_file, out_file, normalize=normalize)
'''形態素列マッチングを単品で実行して確認する ''' from psc_parse import JumanPsc, MrphMatch, MRPH_MTCH_PTN from juman_settings import * juman = JumanPsc(command=JUMAN_COMMAND, option=JUMAN_OPTION) # line = ' 男「ようこそ皆さん、私の名前はジョニー。今日は私のリサイタルショーにおいで下さってありがとうございます。今宵、しばし皆様の時間を拝借して、私の歌をお聞きください。では、まずはこの歌から」' line = '女「男 イヌ' mrphs = juman.analysis(line).mrph_list() mrph_match = MrphMatch(mrphs) # kwargs = { # 'ptn': ( # MrphMatch.match_spaces, # MrphMatch.match_noun, # MrphMatch.match_left_bracket, # ), # 'idx': 0 # } kwargs = MRPH_MTCH_PTN['0002'] result = mrph_match.match(**kwargs) print(f'マッチ判定: {result.matched}') print(f'マッチ範囲文字列: "{result.matched_str}"') print(f'マッチ範囲内形態素数: {result.matched_count}')
from juman_settings import JUMAN_COMMAND, JUMAN_OPTION # 入力ディレクトリ input_dir = 'script_samples' # 出力ディレクトリ output_dir = 'script_features' # 教師ラベルファイルのディレクトリ targets_dir = 'script_targets' # 最初に出力ディレクトリを空にするか empty_output_dir = True # 特徴量を正規化するか normalize = False # 相対パスを絶対パスに input_dir = os.path.join(os.path.dirname(__file__), input_dir) output_dir = os.path.join(os.path.dirname(__file__), output_dir) targets_dir = os.path.join(os.path.dirname(__file__), targets_dir) # 特徴量を作る juman = JumanPsc(command=JUMAN_COMMAND, option=JUMAN_OPTION) make_features(juman, input_dir, output_dir, targets_dir, empty_output_dir=empty_output_dir, normalize=normalize)
'''JumanPsc の形態素解析を単品の文字列で試す ''' from psc_parse import JumanPsc from juman_settings import JUMAN_COMMAND, JUMAN_OPTION juman = JumanPsc(command=JUMAN_COMMAND, option=JUMAN_OPTION) s = '今日もいい天気' try: mrphs = juman.analysis(s) for mrph in mrphs: print(f'"{mrph.midasi}"') # 見出し print(f' genkei: {mrph.genkei}') # 原形 print(f' hinsi: {mrph.hinsi}') # 品詞 print(f' bunrui: {mrph.bunrui}') # 品詞細分類 print(f' katuyou2: {mrph.katuyou2}') # 活用形 except Exception as e: print(e)