Ejemplo n.º 1
0
            inside_code = 32 
        elif (inside_code >= 65281 and inside_code <= 65374): #全角字符(除空格)根据关系转化
            inside_code -= 65248

        rstring += unichr(inside_code)  
    return rstring



try:
    import sofa
except:
    sys.stderr.write('Error: Please excute the following command first:\n')
    sys.stderr.write('export SOFA_CONFIG=./config/drpc_client.xml\n')
    sys.exit(1)
sofa.use('drpc.ver_1_0_0', 'S')
sofa.use('nlpc.ver_1_0_0', 'nlpc')
conf = sofa.Config()
conf.load('./config/drpc_client.xml')#local


if open_flag[0] == 1:
    wordrank_agent = S.ClientAgent(conf['sofa.service.nlpc_wordrank_208'])#local
else:
    wordrank_agent = None;
if open_flag[1] == 1:
    wordpos_agent = S.ClientAgent(conf['sofa.service.nlpc_wordpos_202'])  #local
else:
    wordpos_agent = None;
if open_flag[2] == 1:
    depparser_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_query_107']) #local
Ejemplo n.º 2
0
 def __init__(self, conf):
     sofa.use('OnlineDeployService.ver_1_0_0')
Ejemplo n.º 3
0
 def __init__(self, conf):
     sofa.use('OnlineDeployService.ver_1_0_0')
Ejemplo n.º 4
0
def main():
    sofa.use('drpc.ver_1_0_0', 'S')
    sofa.use('nlpc.ver_1_0_0', 'wordseg')

    conf = sofa.Config()
    conf.load('./config/drpc_client.xml')

    wordseg_agent = S.ClientAgent(conf['sofa.service.nlpc_wordseg_3016'])

    while True:
        line = stdin.readline()
        if len(line) <= 0:
            return
	line = line.decode('utf-8').encode('gbk')
        m_input = wordseg.wordseg_input()
        
        m_input.query = str(line)
        m_input.lang_id = int(0)
        m_input.lang_para = int(0)

        input_data = sofa.serialize(m_input)

        for i in range(5) :
            try:
                ret, output_data = wordseg_agent.call_method(input_data)
		break
            except Exception as e:
                continue
        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            continue

        m_output = wordseg.wordseg_output()
        m_output = sofa.deserialize(output_data, type(m_output))
        m_output = m_output.scw_out
        
        if len(argv) == 2 and argv[1] == 'basic' or len(argv) == 1:
            stdout.write('=========== Basic Word Sep Result =============' + '\n')
            for i in range(m_output.wsbtermcount):
                posidx = GET_TERM_POS(m_output.wsbtermpos[i])
                poslen = GET_TERM_LEN(m_output.wsbtermpos[i])
                word = m_output.wordsepbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'segment' or len(argv) == 1:
            stdout.write('============  Word Phrase Result  ==============' + '\n')
            for i in range(m_output.wpbtermcount):
                posidx = GET_TERM_POS(m_output.wpbtermpos[i])
                poslen = GET_TERM_LEN(m_output.wpbtermpos[i])
                word = m_output.wpcompbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'phrase' or len(argv) == 1:
            stdout.write('============  Sub Phrase Result   ==============' + '\n')
            for i in range(m_output.spbtermcount):
                posidx = GET_TERM_POS(m_output.spbtermpos[i])
                poslen = GET_TERM_LEN(m_output.spbtermpos[i])
                word = m_output.subphrbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'new' or len(argv) == 1:
            stdout.write('============  New Word  Result  ==============' + '\n')
            pnewword = m_output.pnewword
            for i in range(pnewword.newwordbtermcount):
                posidx = GET_TERM_POS(pnewword.newwordbtermpos[i])
                poslen = GET_TERM_LEN(pnewword.newwordbtermpos[i])
                word = pnewword.newwordbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'human' or len(argv) == 1:
            stdout.write('===========  Human Name Result   ==============' + '\n')
            for i in range(m_output.namebtermcount):
                posidx = GET_TERM_POS(m_output.namebtermpos[i])
                poslen = GET_TERM_LEN(m_output.namebtermpos[i])
                word = m_output.namebuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'book' or len(argv) == 1:
            stdout.write('===============  book names   =================' + '\n')
            for i in range(m_output.bnbtermcount):
                posidx = GET_TERM_POS(m_output.bnbtermpos[i])
                poslen = GET_TERM_LEN(m_output.bnbtermpos[i])
                word = m_output.booknamebuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')
Ejemplo n.º 5
0
        if inside_code == 12288:  #全角空格直接转换
            inside_code = 32
        elif (inside_code >= 65281 and inside_code <= 65374):  #全角字符(除空格)根据关系转化
            inside_code -= 65248

        rstring += unichr(inside_code)
    return rstring


try:
    import sofa
except:
    sys.stderr.write('Error: Please excute the following command first:\n')
    sys.stderr.write('export SOFA_CONFIG=./config/drpc_client.xml\n')
    sys.exit(1)
sofa.use('drpc.ver_1_0_0', 'S')
sofa.use('nlpc.ver_1_0_0', 'nlpc')
conf = sofa.Config()
conf.load('./config/drpc_client.xml')  #local

if open_flag[0] == 1:
    wordrank_agent = S.ClientAgent(
        conf['sofa.service.nlpc_wordrank_208'])  #local
else:
    wordrank_agent = None
if open_flag[1] == 1:
    wordpos_agent = S.ClientAgent(
        conf['sofa.service.nlpc_wordpos_202'])  #local
else:
    wordpos_agent = None
if open_flag[2] == 1:
def main():
    sofa.use('drpc.ver_1_0_0', 'S')
    sofa.use('nlpc.ver_1_0_0', 'nlpc')

    conf = sofa.Config()
    conf.load('./config/drpc_client.xml')

    #query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_query_107'])
    query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_web_107'])

    in_sentences = []

    while True:
        line = sys.stdin.readline()
        if len(line) <= 0:
            break
        line = line.strip(' \t\n\r')

        in_sentences.append(str(line))
        if len(in_sentences) < 1000:
            continue

        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            continue

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []

    if len(in_sentences) > 0:
        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)
        m_input.sentences = in_sentences
        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            exit

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []
Ejemplo n.º 7
0
def main():
    sofa.use('drpc.ver_1_0_0', 'S')
    sofa.use('nlpc.ver_1_0_0', 'nlpc')

    conf = sofa.Config()
    conf.load('./config/drpc_client.xml')

    #query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_query_107'])
    query_agent = S.ClientAgent(
        conf['sofa.service.nlpc_depparser_uni_web_107'])

    in_sentences = []

    while True:
        line = sys.stdin.readline()
        if len(line) <= 0:
            break
        line = line.strip(' \t\n\r')

        in_sentences.append(str(line))
        if len(in_sentences) < 1000:
            continue

        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            continue

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(
                    str(j) + '\t' + dep_terms[j].word + '\t' +
                    dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' +
                    dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' +
                    dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' +
                    dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []

    if len(in_sentences) > 0:
        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)
        m_input.sentences = in_sentences
        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            exit

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(
                    str(j) + '\t' + dep_terms[j].word + '\t' +
                    dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' +
                    dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' +
                    dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' +
                    dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []