Example #1
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    echo = subprocess.Popen(
        ['echo', sentence],
        stdout=subprocess.PIPE,
    )

    juman = subprocess.Popen(
        ['/home/kensuke-mi/bin/juman'],
        stdin=echo.stdout,
        stdout=subprocess.PIPE,
    )

    knp = subprocess.Popen(
        ['knp', '-case', '-tab'],
        stdin=juman.stdout,
        stdout=subprocess.PIPE,
    )

    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #print line
        line_split = line.split(" ")
        tmp_list.append(line_split[0])
        clause_list.append(line)

    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list, clause_num, clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list, clause_num, clause)

    info_dic, struc_dic = Syori(clause_list, clause_num, clause,
                                negative_choice)
    set_dic = make_case_set(info_dic)
    if not negative_choice == "":
        set_dic = add_negative(set_dic, negative_choice)

    print "--------------------------"
    print "About structure information"
    print struc_dic
    print "--------------------------"
    print "About case dictionary information"
    print set_dic
    print "--------------------------"

    make_sentence.sentence_rule(set_dic, struc_dic)

    #print set_dic
    #print info_dic

    return info_dic, struc_dic
Example #2
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    echo = subprocess.Popen(['echo',sentence],
                            stdout=subprocess.PIPE,
                            )


    juman = subprocess.Popen(['juman'], 
                             stdin=echo.stdout,
                             stdout=subprocess.PIPE,
                             )


    knp = subprocess.Popen(['knp','-case','-tab'],
                           stdin = juman.stdout,
                           stdout=subprocess.PIPE,
                           )


    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #print line
        line_split = line.split(" ")
        tmp_list.append(line_split[0])
        clause_list.append(line)
        
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list,clause_num,clause)
    
    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list,clause_num,clause)

    info_dic,struc_dic = Syori(clause_list,clause_num,clause,negative_choice)
    set_dic = make_case_set(info_dic)
    if not negative_choice == "":
        set_dic = add_negative(set_dic,negative_choice)
        
    print "--------------------------"
    print "About structure information"
    print struc_dic
    print "--------------------------"
    print "About case dictionary information"
    print set_dic
    print "--------------------------"

    make_sentence.sentence_rule(set_dic,struc_dic)
        
    #print set_dic
    #print info_dic



    return info_dic,struc_dic
Example #3
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    sentence = conv_encoding(sentence)
    sentence = check_kuten(sentence)

    #windowsでは以下のsubprocessはコメントオフ

    try:
        echo = subprocess.Popen(
            ['echo', sentence],
            stdout=subprocess.PIPE,
        )

        juman = subprocess.Popen(
            ['juman'],
            stdin=echo.stdout,
            stdout=subprocess.PIPE,
        )

        knp = subprocess.Popen(
            ['knp', '-case', '-tab'],
            stdin=juman.stdout,
            stdout=subprocess.PIPE,
        )

    except TypeError:
        sys.exit(u'KNPの解析エラーが発生しました。解析を終了します。')
    """
    #以下、windows用のコード
    #windows用のjuman,knpの入力はcp932でないといけない
     __xx__
    str = sentence.encode('cp932')
    
    juman = subprocess.Popen(["juman"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
    knp = subprocess.Popen(["knp", "-tab"], stdin=juman.stdout, stdout=subprocess.PIPE, shell=True)
    juman.stdin.write(str)
    juman.stdin.close()
    juman.stdout.close()
    """

    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #if windows, convertion from cp932 to unicode
        #if linux, conversion from 'utf-8'
        line = unicode(line, 'utf-8')
        #check KNP analysis error
        error_check = check_knp_error(line)
        if error_check == u'error':
            print 'error!'
            return '', ''
        else:
            line_split = line.split(" ")
            tmp_list.append(line_split[0])
            clause_list.append(line)

    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list, clause_num, clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list, clause_num, clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list, clause_num,
                                                 clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list, clause_num, clause, negative_choice,
                           frag)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = make_clause.clause_check(out_list)

    #そもそもif分けする必要はどこにもないので、そのうち修正すること
    if clause_check_result == u"yes":
        out_list = make_clause.make_clause_set(out_list)
    #別に記述せんでもいいが、明文化しておけばわかりやすいじゃん
    if clause_check_result == u"no":
        out_list = make_clause.make_clause_set(out_list)
    if frag == 1:
        print u"*" * 40
        print u"result of make clause:", out_list
        print u"*" * 40
    out_list, orig_index_list = parallel.heiretsu(out_list, frag)

    if frag == 1:
        print u"*" * 40
    out_list, orig_index_list = modifier.modi(out_list, orig_index_list, frag)

    if frag == 1:
        print u"*" * 40
        print u"after make modifier list:", out_list
        print u"*" * 40
        print u"structure information is:", struc_dic
        print u"*" * 40

    #__xx__
    #demo.demo_test(out_list)

    p_a_dic = predicate_dic.make_p_a_dic(out_list, orig_index_list, frag)

    make_sentence.sentence(struc_dic, p_a_dic, out_list, frag)

    #__xx__
    #out_list = parallel.c_heiretsu(out_list,frag)

    if frag == 1:
        print "*" * 40
        print u"Is clause multi or not?:", clause_check_result

    return out_list, struc_dic
Example #4
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    echo = subprocess.Popen(
        ['echo', sentence],
        stdout=subprocess.PIPE,
    )

    juman = subprocess.Popen(
        ['juman'],
        stdin=echo.stdout,
        stdout=subprocess.PIPE,
    )

    knp = subprocess.Popen(
        ['knp', '-case', '-tab'],
        stdin=juman.stdout,
        stdout=subprocess.PIPE,
    )

    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #print line
        line_split = line.split(" ")
        tmp_list.append(line_split[0])
        clause_list.append(line)

    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list, clause_num, clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list, clause_num, clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list, clause_num,
                                                 clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list, clause_num, clause, negative_choice)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = clause_check(out_list)

    print "-----------------------------------"
    print "Is clause multi or not?:", clause_check_result
    '''
    set_dic = make_case_set(info_dic)
    if not negative_choice == "":
        set_dic = add_negative(set_dic,negative_choice)
        
    print "--------------------------"
    print "About structure information"
    print struc_dic
    print "--------------------------"
    print "About case dictionary information"
    print set_dic
    print "--------------------------"

    make_sentence.sentence_rule(set_dic,struc_dic)
        
    #print set_dic
    #print info_dic

    '''

    return out_list, struc_dic
Example #5
0
            line_split = line.split(" ")
            tmp_list.append(line_split[0])
            clause_list.append(line)
        


    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list,clause_num,clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list,clause_num,clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list,clause_num,clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list,clause_num,clause,negative_choice,frag)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = make_clause.clause_check(out_list)

    #そもそもif分けする必要はどこにもないので、そのうち修正すること
    if clause_check_result == u"yes":
        out_list = make_clause.make_clause_set(out_list)
    #別に記述せんでもいいが、明文化しておけばわかりやすいじゃん
    if clause_check_result == u"no":
        out_list = make_clause.make_clause_set(out_list)
    if frag == 1:
Example #6
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    #windowsでの使用のために以下のsubprocessはコメントオフ
    """
    echo = subprocess.Popen(['echo',sentence],
                            stdout=subprocess.PIPE,
                            shell=true
                            )


    juman = subprocess.Popen(['juman'],
                             stdin=echo.stdout,
                             stdout=subprocess.PIPE,
                             )


    knp = subprocess.Popen(['knp','-case','-tab'],
                           stdin = juman.stdout,
                           stdout=subprocess.PIPE,
                           )
    """
    #windows用のjuman,knpの入力はcp931でないといけないらしい
    # __xx__
    str = sentence.encode('cp932')

    juman = subprocess.Popen(["juman"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
    knp = subprocess.Popen(["knp", "-tab"], stdin=juman.stdout, stdout=subprocess.PIPE, shell=True)
    juman.stdin.write(str)
    juman.stdin.close()
    juman.stdout.close()
    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #convertion from cp932 to unicode
        line = unicode(line,'cp932')
        line_split = line.split(" ")
        tmp_list.append(line_split[0])
        clause_list.append(line)


    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list,clause_num,clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list,clause_num,clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list,clause_num,clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list,clause_num,clause,negative_choice,frag)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = make_clause.clause_check(out_list)

    #そもそもif分けする必要はどこにもないので、そのうち修正すること
    if clause_check_result == u"yes":
        out_list = make_clause.make_clause_set(out_list)
    #別に記述せんでもいいが、明文化しておけばわかりやすいじゃん
    if clause_check_result == u"no":
        out_list = make_clause.make_clause_set(out_list)
    if frag == 1:
        print u"======================================="
        print u"result of make clause:",out_list
        print u"======================================="
    out_list,orig_index_list = parallel.heiretsu(out_list,frag)

    if frag == 1:
        print u"======================================="
    out_list,orig_index_list = modifier.modi(out_list,orig_index_list,frag)

    if frag == 1:
        print u"======================================="
        print u"after make modifier list:",out_list
        print u"======================================="
        print u"structure information is:",struc_dic
        print u"======================================="
        print u"demo out is:",demo.demo_test(out_list)
        print u"======================================="
    #__xx__
    print demo.demo_test(out_list)

    out_list = parallel.c_heiretsu(out_list,frag)


    if frag == 1:
        print "-----------------------------------"
        print u"Is clause multi or not?:",clause_check_result


    '''
    set_dic = make_case_set(info_dic)
    if not negative_choice == "":
        set_dic = add_negative(set_dic,negative_choice)

    print "--------------------------"
    print "About structure information"
    print struc_dic
    print "--------------------------"
    print "About case dictionary information"
    print set_dic
    print "--------------------------"

    make_sentence.sentence_rule(set_dic,struc_dic)

    #print set_dic
    #print info_dic

    '''

    return out_list,struc_dic
Example #7
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    #windowsでの使用のために以下のsubprocessはコメントオフ
    """
    echo = subprocess.Popen(['echo',sentence],
                            stdout=subprocess.PIPE,
                            shell=true
                            )


    juman = subprocess.Popen(['juman'],
                             stdin=echo.stdout,
                             stdout=subprocess.PIPE,
                             )


    knp = subprocess.Popen(['knp','-case','-tab'],
                           stdin = juman.stdout,
                           stdout=subprocess.PIPE,
                           )
    """
    #windows用のjuman,knpの入力はcp931でないといけないらしい
    # __xx__
    str = sentence.encode('cp932')

    juman = subprocess.Popen(["juman"],
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             shell=True)
    knp = subprocess.Popen(["knp", "-tab"],
                           stdin=juman.stdout,
                           stdout=subprocess.PIPE,
                           shell=True)
    juman.stdin.write(str)
    juman.stdin.close()
    juman.stdout.close()
    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #convertion from cp932 to unicode
        line = unicode(line, 'cp932')
        line_split = line.split(" ")
        tmp_list.append(line_split[0])
        clause_list.append(line)

    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list, clause_num, clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list, clause_num, clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list, clause_num,
                                                 clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list, clause_num, clause, negative_choice,
                           frag)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = make_clause.clause_check(out_list)

    #そもそもif分けする必要はどこにもないので、そのうち修正すること
    if clause_check_result == u"yes":
        out_list = make_clause.make_clause_set(out_list)
    #別に記述せんでもいいが、明文化しておけばわかりやすいじゃん
    if clause_check_result == u"no":
        out_list = make_clause.make_clause_set(out_list)
    if frag == 1:
        print u"======================================="
        print u"result of make clause:", out_list
        print u"======================================="
    out_list, orig_index_list = parallel.heiretsu(out_list, frag)

    if frag == 1:
        print u"======================================="
    out_list, orig_index_list = modifier.modi(out_list, orig_index_list, frag)

    if frag == 1:
        print u"======================================="
        print u"after make modifier list:", out_list
        print u"======================================="
        print u"structure information is:", struc_dic
        print u"======================================="
        print u"demo out is:", demo.demo_test(out_list)
        print u"======================================="
    #__xx__
    print demo.demo_test(out_list)

    out_list = parallel.c_heiretsu(out_list, frag)

    if frag == 1:
        print "-----------------------------------"
        print u"Is clause multi or not?:", clause_check_result
    '''
    set_dic = make_case_set(info_dic)
    if not negative_choice == "":
        set_dic = add_negative(set_dic,negative_choice)

    print "--------------------------"
    print "About structure information"
    print struc_dic
    print "--------------------------"
    print "About case dictionary information"
    print set_dic
    print "--------------------------"

    make_sentence.sentence_rule(set_dic,struc_dic)

    #print set_dic
    #print info_dic

    '''

    return out_list, struc_dic
Example #8
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    echo = subprocess.Popen(['echo',sentence],
                            stdout=subprocess.PIPE,
                            )


    juman = subprocess.Popen(['juman'], 
                             stdin=echo.stdout,
                             stdout=subprocess.PIPE,
                             )


    knp = subprocess.Popen(['knp','-case','-tab'],
                           stdin = juman.stdout,
                           stdout=subprocess.PIPE,
                           )


    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #print line
        line_split = line.split(" ")
        tmp_list.append(line_split[0])
        clause_list.append(line)
        
    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list,clause_num,clause)
    
    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list,clause_num,clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list,clause_num,clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list,clause_num,clause,negative_choice)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = clause_check(out_list)


    print "-----------------------------------"
    print "Is clause multi or not?:",clause_check_result
    

    '''
    set_dic = make_case_set(info_dic)
    if not negative_choice == "":
        set_dic = add_negative(set_dic,negative_choice)
        
    print "--------------------------"
    print "About structure information"
    print struc_dic
    print "--------------------------"
    print "About case dictionary information"
    print set_dic
    print "--------------------------"

    make_sentence.sentence_rule(set_dic,struc_dic)
        
    #print set_dic
    #print info_dic

    '''

    return out_list,struc_dic
Example #9
0
def knp_tab(sentence):

    tmp_list = []
    clause_list = []

    sentence = conv_encoding(sentence)
    sentence = check_kuten(sentence)

    #windowsでは以下のsubprocessはコメントオフ

    try:
        echo = subprocess.Popen(['echo',sentence],
                                stdout=subprocess.PIPE,
                                )

        
        juman = subprocess.Popen(['juman'],
                                 stdin=echo.stdout,
                                 stdout=subprocess.PIPE,
                                 )


        knp = subprocess.Popen(['knp','-case','-tab'],
                               stdin = juman.stdout,
                               stdout=subprocess.PIPE,
                               )

    except TypeError:
        sys.exit(u'KNPの解析エラーが発生しました。解析を終了します。')

    """
    #以下、windows用のコード
    #windows用のjuman,knpの入力はcp932でないといけない
     __xx__
    str = sentence.encode('cp932')
    
    juman = subprocess.Popen(["juman"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
    knp = subprocess.Popen(["knp", "-tab"], stdin=juman.stdout, stdout=subprocess.PIPE, shell=True)
    juman.stdin.write(str)
    juman.stdin.close()
    juman.stdout.close()
    """
    
    end_of_pipe_tab = knp.stdout

    for line in end_of_pipe_tab:
        #if windows, convertion from cp932 to unicode
        #if linux, conversion from 'utf-8'
        line = unicode(line,'utf-8')
        #check KNP analysis error
        error_check = check_knp_error(line)
        if error_check == u'error': 
            print 'error!'
            return '',''
        else: 
            line_split = line.split(" ")
            tmp_list.append(line_split[0])
            clause_list.append(line)
        


    #--------------------------------------
    #ここで各処理関数に情報を投げる
    clause_num, clause = clause_count(tmp_list)
    #returns 0 if not negation, returns 1 if negation
    negative_value = negative.find_negation(clause_list,clause_num,clause)

    if negative_value == 0:
        negative_choice = ""
    else:
        negative_choice = negative.negation(clause_list,clause_num,clause)

    #文の構文に関する情報。返ってくるのはハッシュマップ
    struc_dic = struc_analyze.structure_analyzer(clause_list,clause_num,clause)
    #文の情報を抽出。返ってくるのはリスト
    out_list = syori.Syori(clause_list,clause_num,clause,negative_choice,frag)
    #節が複数節なのか?単節なのか?を判断する。返ってくるのは二値。yes or no
    clause_check_result = make_clause.clause_check(out_list)

    #そもそもif分けする必要はどこにもないので、そのうち修正すること
    if clause_check_result == u"yes":
        out_list = make_clause.make_clause_set(out_list)
    #別に記述せんでもいいが、明文化しておけばわかりやすいじゃん
    if clause_check_result == u"no":
        out_list = make_clause.make_clause_set(out_list)
    if frag == 1:
        print u"*"*40
        print u"result of make clause:",out_list
        print u"*"*40
    out_list,orig_index_list = parallel.heiretsu(out_list,frag)

    if frag == 1:
        print u"*"*40
    out_list,orig_index_list = modifier.modi(out_list,orig_index_list,frag)

    if frag == 1:
        print u"*"*40
        print u"after make modifier list:",out_list
        print u"*"*40
        print u"structure information is:",struc_dic
        print u"*"*40

    #__xx__
    #demo.demo_test(out_list)

    p_a_dic = predicate_dic.make_p_a_dic(out_list,orig_index_list,frag)

    make_sentence.sentence(struc_dic,p_a_dic,out_list,frag)

    #__xx__
    #out_list = parallel.c_heiretsu(out_list,frag)


    if frag == 1:
        print "*"*40
        print u"Is clause multi or not?:",clause_check_result

    return out_list,struc_dic