Beispiel #1
0
def word_depparser(sentence, is_segmented=False):
    if not depparser_agent:
        return []
    ## post request
    m_input = nlpc.parse_prep_input()
    m_input.sentence = str(utf8_gbk(sentence))
    m_input.grain_size = 1
    m_input.sentence_segmented = is_segmented
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = depparser_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        sys.stderr.write('No result' + sentence + '\n')
        return []

    ## get results
    m_output = nlpc.depparser_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    tokens = m_output.items
    depparser_list = []
    for i in range(len(tokens)):
        if len(tokens[i].deprel.strip()) == 0:
            tokens[i].deprel = '_'
        word = gbk_utf8(tokens[i].word)
        depparser_list.append((word, tokens[i].deprel))
    return depparser_list
def word_depparser(sentence, is_segmented=False):
    if not depparser_agent:
        return [];
    ## post request
    m_input = nlpc.parse_prep_input()
    m_input.sentence = str(utf8_gbk(sentence))
    m_input.grain_size = 1 
    m_input.sentence_segmented = is_segmented
    input_data = sofa.serialize(m_input)
    for i in range(5) :
        try:
            ret, output_data = depparser_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        sys.stderr.write('No result' + sentence + '\n')
        return []
    
    ## get results
    m_output = nlpc.depparser_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    tokens = m_output.items
    depparser_list = []
    for i in range(len(tokens)):
        if len(tokens[i].deprel.strip()) == 0:
            tokens[i].deprel = '_'
        word = gbk_utf8(tokens[i].word)
        depparser_list.append((word, tokens[i].deprel))
    return depparser_list
def word_ner(sentence):
    if not wordner_agent:
        return [];
    language_id = 0
    output_id = 1
    m_input = nlpc.wordner_input()
    m_input.lang_id = int(1)
    m_input.query = str(utf8_gbk(sentence))
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = wordner_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        sys.stderr.write('The server returns None.' + '\n')
        return [];
    m_output = nlpc.wordner_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    tags = m_output.tags
    tags_size = len(tags)
    word_ner_list = [];
    for i in range(tags_size):
        word_ner_list.append((gbk_utf8(tags[i].term), str(tags[i].type), trans_id_short[tags[i].type]));
        '''
        sys.stderr.write(gbk_utf8(tags[i].term) + ' ')
        if trans_id_short.has_key(tags[i].type):
            sys.stderr.write(trans_id_short[tags[i].type] + '\t')
        else:
            sys.stderr.write('NOR' + '\t')
    sys.stderr.write('\n')
    '''
    return word_ner_list;
def word_rank(sentence):
    if not wordrank_agent:
        return [];
    ## post request
    m_input = nlpc.wordseg_input()
    m_input.lang_id = int(0)
    m_input.lang_para = int(0)
    m_input.query = str(utf8_gbk(sentence))
    input_data = sofa.serialize(m_input)
    for i in range(5) :
        try:
            ret, output_data = wordrank_agent.call_method(input_data)
            break
        except Exception as e:
            pass;
    if len(output_data) == 0:
        sys.stderr.write('No result' + sentence + '\n')
        return []
    
    ## get results
    # wordrank_result
    m_output = nlpc.wordrank_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    rank_result_list = list()
    list_size = len(m_output.nlpc_trunks_pn)
    for i in range(list_size):
        word = m_output.nlpc_trunks_pn[i].buffer
        word = gbk_utf8(word)
        rank = m_output.nlpc_trunks_pn[i].rank
        wght = round(m_output.nlpc_trunks_pn[i].weight,3)
        rank_result_list.append((word, rank, wght))
    return rank_result_list
def word_seg(sent):
    m_input = nlpc.wordseg_input()
    m_input.query = str(utf8_gbk(sent))
    m_input.lang_id = int(0)
    m_input.lang_para = int(0)
    input_data = sofa.serialize(m_input)
    for i in range(5) :
        try:
            ret, output_data = wordseg_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        return [];
    m_output = nlpc.wordseg_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    m_output = m_output.scw_out
    
    ret_data = []
    ##seg
    for i in range(m_output.wpbtermcount):
        posidx = GET_TERM_POS(m_output.wpbtermpos[i])
        poslen = GET_TERM_LEN(m_output.wpbtermpos[i])
        word = m_output.wpcompbuf[posidx : posidx + poslen]
        ret_data.append((str(posidx), str(poslen), gbk_utf8(word)))

    '''
    for i in range(m_output.wsbtermcount):
        posidx = GET_TERM_POS(m_output.wsbtermpos[i])   
        poslen = GET_TERM_LEN(m_output.wsbtermpos[i])
        word = m_output.wordsepbuf[posidx : posidx + poslen]
        ret_data.append((posidx, poslen, word))
    '''

    return ret_data
def word_pos(sentence):
    if not wordpos_agent:
        return [];
    ## post request
    m_input = nlpc.wordseg_input()
    m_input.lang_id = int(0)
    m_input.lang_para = int(0)
    m_input.query = str(utf8_gbk(sentence))
    input_data = sofa.serialize(m_input)
    for i in range(5) :
        try:
            ret, output_data = wordpos_agent.call_method(input_data)
            break
        except Exception as e:
            pass;
    if len(output_data) == 0:
        sys.stderr.write('No result' + sentence + '\n')
        return []
    
    ## get results
    # wordpos result
    m_output = nlpc.wordpos_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    tokens_size = len(m_output.nlpc_tokens)
    segment_result = []
    for i in range(tokens_size):
        stag = get_pos_str(m_output.nlpc_tokens[i].type)
        if stag:
            word = m_output.nlpc_tokens[i].buffer
            word = gbk_utf8(word)
            segment_result.append((word, stag))
    return segment_result
Beispiel #7
0
def word_ner(sentence):
    if not wordner_agent:
        return []
    language_id = 0
    output_id = 1
    m_input = nlpc.wordner_input()
    m_input.lang_id = int(1)
    m_input.query = str(utf8_gbk(sentence))
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = wordner_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        sys.stderr.write('The server returns None.' + '\n')
        return []
    m_output = nlpc.wordner_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    tags = m_output.tags
    tags_size = len(tags)
    word_ner_list = []
    for i in range(tags_size):
        word_ner_list.append((gbk_utf8(tags[i].term), str(tags[i].type),
                              trans_id_short[tags[i].type]))
        '''
        sys.stderr.write(gbk_utf8(tags[i].term) + ' ')
        if trans_id_short.has_key(tags[i].type):
            sys.stderr.write(trans_id_short[tags[i].type] + '\t')
        else:
            sys.stderr.write('NOR' + '\t')
    sys.stderr.write('\n')
    '''
    return word_ner_list
Beispiel #8
0
def word_rank(sentence):
    if not wordrank_agent:
        return []
    ## post request
    m_input = nlpc.wordseg_input()
    m_input.lang_id = int(0)
    m_input.lang_para = int(0)
    m_input.query = str(utf8_gbk(sentence))
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = wordrank_agent.call_method(input_data)
            break
        except Exception as e:
            pass
    if len(output_data) == 0:
        sys.stderr.write('No result' + sentence + '\n')
        return []

    ## get results
    # wordrank_result
    m_output = nlpc.wordrank_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    rank_result_list = list()
    list_size = len(m_output.nlpc_trunks_pn)
    for i in range(list_size):
        word = m_output.nlpc_trunks_pn[i].buffer
        word = gbk_utf8(word)
        rank = m_output.nlpc_trunks_pn[i].rank
        wght = round(m_output.nlpc_trunks_pn[i].weight, 3)
        rank_result_list.append((word, rank, wght))
    return rank_result_list
Beispiel #9
0
def word_pos(sentence):
    if not wordpos_agent:
        return []
    ## post request
    m_input = nlpc.wordseg_input()
    m_input.lang_id = int(0)
    m_input.lang_para = int(0)
    m_input.query = str(utf8_gbk(sentence))
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = wordpos_agent.call_method(input_data)
            break
        except Exception as e:
            pass
    if len(output_data) == 0:
        sys.stderr.write('No result' + sentence + '\n')
        return []

    ## get results
    # wordpos result
    m_output = nlpc.wordpos_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    tokens_size = len(m_output.nlpc_tokens)
    segment_result = []
    for i in range(tokens_size):
        stag = get_pos_str(m_output.nlpc_tokens[i].type)
        if stag:
            word = m_output.nlpc_tokens[i].buffer
            word = gbk_utf8(word)
            segment_result.append((word, stag))
    return segment_result
Beispiel #10
0
def word_seg(sent):
    m_input = nlpc.wordseg_input()
    m_input.query = str(utf8_gbk(sent))
    m_input.lang_id = int(0)
    m_input.lang_para = int(0)
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = wordseg_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        return []
    m_output = nlpc.wordseg_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    m_output = m_output.scw_out

    ret_data = []
    ##seg
    for i in range(m_output.wpbtermcount):
        posidx = GET_TERM_POS(m_output.wpbtermpos[i])
        poslen = GET_TERM_LEN(m_output.wpbtermpos[i])
        word = m_output.wpcompbuf[posidx:posidx + poslen]
        ret_data.append((str(posidx), str(poslen), gbk_utf8(word)))
    '''
    for i in range(m_output.wsbtermcount):
        posidx = GET_TERM_POS(m_output.wsbtermpos[i])   
        poslen = GET_TERM_LEN(m_output.wsbtermpos[i])
        word = m_output.wordsepbuf[posidx : posidx + poslen]
        ret_data.append((posidx, poslen, word))
    '''

    return ret_data
Beispiel #11
0
def word_lmscore(sentence):
    if not lmscore_agent:
        return 0.0;
    m_input = nlpc.lmscore_input()
    m_input.query = str(utf8_gbk(sentence));
    m_input.debug_flag = True
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = lmscore_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        return 0.0

    m_output = nlpc.lmscore_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    return m_output.result.prob;
Beispiel #12
0
	def _sim(self,q1,q2):
		m_input = textsim.textsim_input()
		m_input.query1 = q1
		m_input.query2 = q2
		input_data = sofa.serialize(m_input)
		for i in range(5) :
			try:
				ret, output_data = self.textsim_agent.call_method(input_data)
				break
			except Exception as e:
				#print e
				output_data=''
				continue
		if len(output_data) == 0:
			pass
		else:
			m_output = textsim.textsim_output()
			m_output = sofa.deserialize(output_data, type(m_output))
			return m_output.textsim
Beispiel #13
0
 def _sim(self, q1, q2):
     m_input = textsim.textsim_input()
     m_input.query1 = q1
     m_input.query2 = q2
     input_data = sofa.serialize(m_input)
     for i in range(5):
         try:
             ret, output_data = self.textsim_agent.call_method(input_data)
             break
         except Exception as e:
             #print e
             output_data = ''
             continue
     if len(output_data) == 0:
         pass
     else:
         m_output = textsim.textsim_output()
         m_output = sofa.deserialize(output_data, type(m_output))
         return m_output.textsim
Beispiel #14
0
def word_lmscore(sentence):
    if not lmscore_agent:
        return 0.0
    m_input = nlpc.lmscore_input()
    m_input.query = str(utf8_gbk(sentence))
    m_input.debug_flag = True
    input_data = sofa.serialize(m_input)
    for i in range(5):
        try:
            ret, output_data = lmscore_agent.call_method(input_data)
            break
        except Exception as e:
            continue
    if len(output_data) == 0:
        return 0.0

    m_output = nlpc.lmscore_output()
    m_output = sofa.deserialize(output_data, type(m_output))
    return m_output.result.prob
Beispiel #15
0
def main():
    sofa.use('drpc.ver_1_0_0', 'S')
    sofa.use('nlpc.ver_1_0_0', 'nlpc')

    conf = sofa.Config()
    conf.load('./config/drpc_client.xml')

    #query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_query_107'])
    query_agent = S.ClientAgent(
        conf['sofa.service.nlpc_depparser_uni_web_107'])

    in_sentences = []

    while True:
        line = sys.stdin.readline()
        if len(line) <= 0:
            break
        line = line.strip(' \t\n\r')

        in_sentences.append(str(line))
        if len(in_sentences) < 1000:
            continue

        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            continue

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(
                    str(j) + '\t' + dep_terms[j].word + '\t' +
                    dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' +
                    dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' +
                    dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' +
                    dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []

    if len(in_sentences) > 0:
        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)
        m_input.sentences = in_sentences
        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            exit

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(
                    str(j) + '\t' + dep_terms[j].word + '\t' +
                    dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' +
                    dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' +
                    dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' +
                    dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []
Beispiel #16
0
def main():
    sofa.use('drpc.ver_1_0_0', 'S')
    sofa.use('nlpc.ver_1_0_0', 'wordseg')

    conf = sofa.Config()
    conf.load('./config/drpc_client.xml')

    wordseg_agent = S.ClientAgent(conf['sofa.service.nlpc_wordseg_3016'])

    while True:
        line = stdin.readline()
        if len(line) <= 0:
            return
	line = line.decode('utf-8').encode('gbk')
        m_input = wordseg.wordseg_input()
        
        m_input.query = str(line)
        m_input.lang_id = int(0)
        m_input.lang_para = int(0)

        input_data = sofa.serialize(m_input)

        for i in range(5) :
            try:
                ret, output_data = wordseg_agent.call_method(input_data)
		break
            except Exception as e:
                continue
        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            continue

        m_output = wordseg.wordseg_output()
        m_output = sofa.deserialize(output_data, type(m_output))
        m_output = m_output.scw_out
        
        if len(argv) == 2 and argv[1] == 'basic' or len(argv) == 1:
            stdout.write('=========== Basic Word Sep Result =============' + '\n')
            for i in range(m_output.wsbtermcount):
                posidx = GET_TERM_POS(m_output.wsbtermpos[i])
                poslen = GET_TERM_LEN(m_output.wsbtermpos[i])
                word = m_output.wordsepbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'segment' or len(argv) == 1:
            stdout.write('============  Word Phrase Result  ==============' + '\n')
            for i in range(m_output.wpbtermcount):
                posidx = GET_TERM_POS(m_output.wpbtermpos[i])
                poslen = GET_TERM_LEN(m_output.wpbtermpos[i])
                word = m_output.wpcompbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'phrase' or len(argv) == 1:
            stdout.write('============  Sub Phrase Result   ==============' + '\n')
            for i in range(m_output.spbtermcount):
                posidx = GET_TERM_POS(m_output.spbtermpos[i])
                poslen = GET_TERM_LEN(m_output.spbtermpos[i])
                word = m_output.subphrbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'new' or len(argv) == 1:
            stdout.write('============  New Word  Result  ==============' + '\n')
            pnewword = m_output.pnewword
            for i in range(pnewword.newwordbtermcount):
                posidx = GET_TERM_POS(pnewword.newwordbtermpos[i])
                poslen = GET_TERM_LEN(pnewword.newwordbtermpos[i])
                word = pnewword.newwordbuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'human' or len(argv) == 1:
            stdout.write('===========  Human Name Result   ==============' + '\n')
            for i in range(m_output.namebtermcount):
                posidx = GET_TERM_POS(m_output.namebtermpos[i])
                poslen = GET_TERM_LEN(m_output.namebtermpos[i])
                word = m_output.namebuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')

        if len(argv) == 2 and argv[1] == 'book' or len(argv) == 1:
            stdout.write('===============  book names   =================' + '\n')
            for i in range(m_output.bnbtermcount):
                posidx = GET_TERM_POS(m_output.bnbtermpos[i])
                poslen = GET_TERM_LEN(m_output.bnbtermpos[i])
                word = m_output.booknamebuf[posidx : posidx + poslen]
                stdout.write('%s ' %word)
            stdout.write('\n')
def main():
    sofa.use('drpc.ver_1_0_0', 'S')
    sofa.use('nlpc.ver_1_0_0', 'nlpc')

    conf = sofa.Config()
    conf.load('./config/drpc_client.xml')

    #query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_query_107'])
    query_agent = S.ClientAgent(conf['sofa.service.nlpc_depparser_uni_web_107'])

    in_sentences = []

    while True:
        line = sys.stdin.readline()
        if len(line) <= 0:
            break
        line = line.strip(' \t\n\r')

        in_sentences.append(str(line))
        if len(in_sentences) < 1000:
            continue

        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            continue

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []

    if len(in_sentences) > 0:
        m_input = nlpc.depparser_uni_input()

        m_input.grain_size = 1
        m_input.sentence_segmented = False
        m_input.sentences = in_sentences

        input_data = sofa.serialize(m_input)
        m_input.sentences = in_sentences
        input_data = sofa.serialize(m_input)

        for i in range(5):
            try:
                ret, output_data = query_agent.call_method(input_data)
                break
            except Exception as e:
                continue

        if len(output_data) == 0:
            stdout.write('No result' + '\n')
            exit

        m_output = nlpc.depparser_uni_output()
        m_output = sofa.deserialize(output_data, type(m_output))

        dep_sentences = m_output.dep_sentences
        sent_num = len(dep_sentences)
        for i in range(sent_num):
            dep_terms = dep_sentences[i].dep_terms
            term_num = len(dep_terms)
            for j in range(term_num):
                if dep_terms[j].lemma.strip() is None:
                    dep_terms[j].lemma = '_'
                if dep_terms[j].cpostag.strip() is None:
                    dep_terms[j].cpostag = '_'
                if dep_terms[j].postag.strip() is None:
                    dep_terms[j].postag = '_'
                if dep_terms[j].ner.strip() is None:
                    dep_terms[j].ner = '_'
                if dep_terms[j].feat.strip() is None:
                    dep_terms[j].feat = '_'
                if dep_terms[j].deprel.strip() is None:
                    dep_terms[j].deprel = '_'
                sys.stdout.write(str(j) + '\t' + dep_terms[j].word + '\t' + dep_terms[j].lemma + '\t' + dep_terms[j].cpostag + '\t' + dep_terms[j].postag + '\t' + dep_terms[j].ner + '\t' + dep_terms[j].feat + '\t' + str(dep_terms[j].head) + '\t' + dep_terms[j].deprel + '\n')
            sys.stdout.write('\n')
        in_sentences = []