예제 #1
0
 def __init__(self, address_morph, address_syntax, address_rst):
     self._ppl = PipelineCommon([
         (ProcessorRemote(address_morph[0], address_morph[1],
                          'default'), ['text'], {
                              'sentences': 'sentences',
                              'tokens': 'tokens',
                              'postag': 'postag',
                              'lemma': 'lemma'
                          }),
         (ConverterMystemToUd(), ['postag'], {
             'morph': 'morph',
             'postag': 'postag'
         }),
         (ProcessorRemote(address_syntax[0], address_syntax[1],
                          '0'), ['tokens', 'sentences'], {
                              'syntax_dep_tree': 'syntax_dep_tree',
                              'postag': 'ud_postag'
                          }),
         (ProcessorRemote(address_rst[0], address_rst[1], 'default'), [
             'text', 'tokens', 'sentences', 'lemma', 'morph', 'postag',
             'syntax_dep_tree'
         ], {
             'rst': 'rst'
         })
     ])
     self._name = 'default'
예제 #2
0
 def __init__(self, basic_processor=('vmh1.isa.ru', 3344), udpipe_processor=('vmh1.isa.ru', 3355)):
     self.ppl = WrapperMultiProcessDocument([
                 PipelineCommon([
                     (
                         ProcessorRemote(basic_processor[0], basic_processor[1], 'default'),
                         ['text'],
                         {
                             'sentences' : 'sentences', 
                             'tokens' : 'tokens',
                             'postag' : 'mystem_postags',
                             'lemma' : 'lemma'
                         }
                     ),
                     (
                         ProcessorRemote(udpipe_processor[0], udpipe_processor[1], '0'), 
                         ['tokens', 'sentences'], 
                         {
                             'syntax_dep_tree' : 'syntax_dep_tree'
                         }
                     ),
                     (
                         ConverterMystemToUd(),
                         ['mystem_postags'],
                         {
                             'morph' : 'postag',
                         }
                     )
                 ])
             ])
예제 #3
0
def get_tree(text):
	from isanlp import PipelineCommon
	from isanlp.processor_remote import ProcessorRemote
	from isanlp.ru.converter_mystem_to_ud import ConverterMystemToUd
	from Parser.some_reparser import extract_semantic_relations
	HOST = 'localhost'
	proc_morph = ProcessorRemote(HOST, 3333, 'default')
	proc_syntax = ProcessorRemote(HOST, 3334, '0')

	syntax_ppl = PipelineCommon([
		(proc_morph,
			['text'],
			{'tokens' : 'tokens', 'sentences' : 'sentences', 'postag' : 'postag', 'lemma' : 'lemma'}),
		(proc_syntax,
			['tokens','sentences'],
			{'syntax_dep_tree' : 'syntax_dep_tree'}),
		(ConverterMystemToUd(),
			['postag'],
			{'postag' : 'postag', 'morph' : 'morph'})
		])
	try:
		analysis_res = syntax_ppl(text)
	except:
		return None
	sentences = []
	for i in analysis_res['sentences']:
		sentence = []
		for j in range(i.begin, i.end):
			sentence.append(analysis_res['tokens'][j].text)
		sentences.append(sentence)
	vertices_list_list = []
	relations = extract_semantic_relations(text)
	for j in range(len(analysis_res['lemma'])):
		vertices_list = []
		for i in range(len(analysis_res['lemma'][j])):
			start, end = analysis_res['tokens'][i].begin, analysis_res['tokens'][i].end
			role_vert = []
			for rel in relations:
				if rel['child']['start'] == start and rel['child']['end'] == end:
					role_vert.append(rel['tp'])
			vert = tree(word(analysis_res['lemma'][j][i],
					analysis_res['postag'][j][i],
					analysis_res['morph'][j][i],
					start, end,
					i,
					role = role_vert))
			vertices_list.append(vert)
		vertices_list_list.append(vertices_list)
	root_list = []
	for i in range(len(vertices_list_list)):
		list_ = vertices_list_list[i]
		for j in range(len(analysis_res['syntax_dep_tree'][i])):
			_ = analysis_res['syntax_dep_tree'][i][j]
			if _.parent != -1:
				list_[_.parent].add_child(list_[j], _.link_name)
			else:
				list_[j].sentence = sentences[i]
				root_list.append(list_[j])
	return root_list
예제 #4
0
 def __init__(
     self,
     udpipe=("tsa05.isa.ru", 3334),
     rst=("papertext.ru", 5555),
     cache_path="./rst-cache.pkl",
 ):
     udpipe_host, udpipe_port = udpipe
     rst_host, rst_port = rst
     self.cache_path = cache_path
     self.ppl = PipelineCommon(
         [
             (
                 ProcessorRemote(udpipe_host, udpipe_port, "0"),
                 ["text"],
                 {
                     "sentences": "sentences",
                     "tokens": "tokens",
                     "lemma": "lemma",
                     "syntax_dep_tree": "syntax_dep_tree",
                     "postag": "ud_postag",
                 },
             ),
             (
                 ProcessorMystem(delay_init=False),
                 ["tokens", "sentences"],
                 {"postag": "postag"},
             ),
             (
                 ConverterMystemToUd(),
                 ["postag"],
                 {"morph": "morph", "postag": "postag"},
             ),
             (
                 ProcessorRemote(rst_host, rst_port, "default"),
                 [
                     "text",
                     "tokens",
                     "sentences",
                     "postag",
                     "morph",
                     "lemma",
                     "syntax_dep_tree",
                 ],
                 {"clauses": "clauses"},
             ),
         ]
     )
     self.__cache = {}
     self.__hasher = city_32()
     if os.path.exists(self.cache_path):
         self.__cache = jb.load(self.cache_path)
예제 #5
0
def get_tree(text):
    HOST = 'localhost'
    proc_morph = ProcessorRemote(HOST, 3333, 'default')
    proc_syntax = ProcessorRemote(HOST, 3334, '0')

    syntax_ppl = PipelineCommon([(proc_morph, ['text'], {
        'tokens': 'tokens',
        'sentences': 'sentences',
        'postag': 'postag',
        'lemma': 'lemma'
    }),
                                 (proc_syntax, ['tokens', 'sentences'], {
                                     'syntax_dep_tree': 'syntax_dep_tree'
                                 }),
                                 (ConverterMystemToUd(), ['postag'], {
                                     'postag': 'postag',
                                     'morph': 'morph'
                                 })])
    analysis_res = syntax_ppl(text)
    sentences = []
    for i in analysis_res['sentences']:
        sentence = []
        for j in range(i.begin, i.end):
            sentence.append(analysis_res['tokens'][j].text)
        sentences.append(sentence)
    vertices_list_list = []
    for j in range(len(analysis_res['lemma'])):
        vertices_list = []
        for i in range(len(analysis_res['lemma'][j])):
            vert = tree(
                word(analysis_res['lemma'][j][i], analysis_res['postag'][j][i],
                     analysis_res['morph'][j][i], i))
            vertices_list.append(vert)
        vertices_list_list.append(vertices_list)
    root_list = []
    for i in range(len(vertices_list_list)):
        list_ = vertices_list_list[i]
        for j in range(len(analysis_res['syntax_dep_tree'][i])):
            _ = analysis_res['syntax_dep_tree'][i][j]
            if _.parent != -1:
                list_[_.parent].add_child(list_[j], _.link_name)
            else:
                list_[j].sentence = sentences[i]
                root_list.append(list_[j])
    return root_list
예제 #6
0
import os

from isanlp import PipelineCommon
from isanlp.processor_remote import ProcessorRemote

host = 'localhost'

port_morph = int(os.environ['TEST_MORPH_PORT'])
port_srl = int(os.environ['TEST_SRL_PORT'])
text_path = os.environ['TEST_EN_PATH']

with open(text_path, encoding='utf8') as f:
    text = f.read()

ppl = PipelineCommon([(ProcessorRemote(host=host,
                                       port=port_morph,
                                       pipeline_name='default'),
                       ['text'],
                       {'tokens': 'tokens',
                        'sentences': 'sentences',
                        'lemma': 'lemma',
                        'postag': 'postag'}),
                      (ProcessorRemote(host=host,
                                       port=port_srl,
                                       pipeline_name='default'),
                       ['tokens', 'sentences'],
                       {'srl': 'srl'})
                      ])

annotations = ppl(text)
예제 #7
0
import os
from isanlp.processor_remote import ProcessorRemote
from isanlp.processor_syntaxnet_remote import ProcessorSyntaxNetRemote
from isanlp import PipelineCommon
from isanlp.ru.converter_mystem_to_ud import ConverterMystemToUd

port_morph = int(os.environ['TEST_MORPH_PORT'])
port_syntax = int(os.environ['TEST_SYNTAX_PORT'])
port_srl = int(os.environ['TEST_SRL_PORT'])
text_path = os.environ['TEST_PATH']

with open(text_path, encoding='utf8') as f:
    text = f.read()

ppl = PipelineCommon([(ProcessorRemote(host='localhost',
                                       port=port_morph,
                                       pipeline_name='default'), ['text'], {
                                           'tokens': 'tokens',
                                           'sentences': 'sentences',
                                           'lemma': 'lemma',
                                           'postag': 'mystem_postag'
                                       }),
                      (ConverterMystemToUd(), ['mystem_postag'], {
                          'morph': 'morph',
                          'postag': 'postag'
                      }),
                      (ProcessorSyntaxNetRemote(host='localhost',
                                                port=port_syntax),
                       ['tokens', 'sentences'], {
                           'syntax_dep_tree': 'syntax_dep_tree'
                       }),
예제 #8
0
import os
from isanlp.processor_remote import ProcessorRemote

port = int(os.environ['TEST_PORT'])
text_path = os.environ['TEST_PATH']

with open(text_path, encoding='utf8') as f:
    text = f.read()

proc = ProcessorRemote(host='localhost', port=port, pipeline_name='default')
annotations = proc(text)