def read_text(self, text, aux_text=None, reset_context=True): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() self.disamb = Disambiguation(self.hg, self.parser) nlp_parses = self.parser.parse_text(text) if reset_context: self.aux_text = text if aux_text: self.aux_text = '%s\n%s' % (text, aux_text) parses = [(p[0], self.read_sentence(Sentence(p[1]))) for p in nlp_parses] for p in parses: self.debug_msg('== extra ==') for edg in p[1].edges: self.debug_msg(ed.edge2str(edg)) return parses
def read_text(self, text): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() sents = self.parser.parse_text(text) return [self.read_sentence(Sentence(sent)) for sent in sents]
class Extractor(object): def __init__(self, hg, stages=('alpha-forest', 'beta-naive', 'gamma', 'delta', 'epsilon'), show_namespaces=False): self.hg = hg self.stages = stages self.parser = None self.disamb = None self.debug = False self.outputs = [] self.aux_text = '' self.show_namespaces = show_namespaces def create_stage(self, name, output): if name == 'alpha-forest': return AlphaForest() elif name == 'beta': return BetaStage(self.hg, self.parser, self.disamb, output, self.aux_text) elif name == 'beta-simple': return BetaStageSimple(output) elif name == 'beta-naive': return BetaStageNaive(output) elif name == 'gamma': return GammaStage(output) elif name == 'delta': return DeltaStage(output) elif name == 'epsilon': return EpsilonStage(output) else: raise RuntimeError('unknnown stage name: %s' % name) def debug_msg(self, msg): logging.info(msg) if self.debug: print(msg) def read_text(self, text, aux_text=None, reset_context=True): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() self.disamb = Disambiguation(self.hg, self.parser) nlp_parses = self.parser.parse_text(text) if reset_context: self.aux_text = text if aux_text: self.aux_text = '%s\n%s' % (text, aux_text) parses = [(p[0], self.read_sentence(Sentence(p[1]))) for p in nlp_parses] for p in parses: self.debug_msg('== extra ==') for edg in p[1].edges: self.debug_msg(ed.edge2str(edg)) return parses def read_sentence(self, sentence): self.debug_msg('parsing sentence: %s' % sentence) if self.debug: sentence.print_tree() self.outputs = [] last_stage_output = None first = True for name in self.stages: stage = self.create_stage(name, last_stage_output) self.debug_msg('executing %s stage...' % name) if first: last_stage_output = stage.process_sentence(sentence) first = False else: last_stage_output = stage.process() output = last_stage_output.tree.to_hyperedge_str( with_namespaces=self.show_namespaces) self.outputs.append(output) self.debug_msg(output) last_stage_output.main_edge = last_stage_output.tree.to_hyperedge() return last_stage_output
class Extractor(object): def __init__(self, hg, alpha='default', beta='default', gamma='default', delta='default', epsilon='default'): self.hg = hg self.alpha = alpha self.beta = beta self.gamma = gamma self.delta = delta self.epsilon = epsilon self.parser = None self.debug = False self.alpha_output = None self.beta_output = None self.gamma_output = None self.delta_output = None self.epsilon_output = None def debug_msg(self, msg): if self.debug: print(msg) def create_alpha_stage(self): if self.alpha == 'default': return AlphaStage() else: raise RuntimeError('unknnown alpha stage type: %s' % self.alpha) def create_beta_stage(self, tree): if self.beta == 'default': return BetaStage(self.hg, tree) else: raise RuntimeError('unknnown beta stage type: %s' % self.beta) def create_gamma_stage(self, tree): if self.gamma == 'default': return GammaStage(tree) else: raise RuntimeError('unknnown gamma stage type: %s' % self.gamma) def create_delta_stage(self, tree): if self.delta == 'default': return DeltaStage(tree) else: raise RuntimeError('unknnown delta stage type: %s' % self.delta) def create_epsilon_stage(self, tree): if self.epsilon == 'default': return EpsilonStage(tree) else: raise RuntimeError('unknnown epsilon stage type: %s' % self.epsilon) def read_text(self, text): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() sents = self.parser.parse_text(text) return [self.read_sentence(Sentence(sent)) for sent in sents] def read_sentence(self, sentence): self.debug_msg('parsing sentence: %s' % sentence) if self.debug: sentence.print_tree() alpha_stage = self.create_alpha_stage() self.debug_msg('executing alpha stage...') tree = alpha_stage.process_sentence(sentence) self.alpha_output = str(tree) self.debug_msg(self.alpha_output) beta_stage = self.create_beta_stage(tree) self.debug_msg('executing beta stage...') tree = beta_stage.process() self.beta_output = str(tree) self.debug_msg(self.beta_output) gamma_stage = self.create_gamma_stage(tree) self.debug_msg('executing gamma stage...') tree = gamma_stage.process() self.gamma_output = str(tree) self.debug_msg(self.gamma_output) delta_stage = self.create_delta_stage(tree) self.debug_msg('executing delta stage...') tree = delta_stage.process() self.delta_output = str(tree) self.debug_msg(self.delta_output) epsilon_stage = self.create_epsilon_stage(tree) self.debug_msg('executing epsilon stage...') tree = epsilon_stage.process() self.epsilon_output = str(tree) self.debug_msg(self.epsilon_output) return tree
def process_sentence(self, sentence): self.tree.root_id = self.process_token(sentence.root()) self.tree.remove_redundant_nesting() return self.tree def transform(sentence): alpha = AlphaStage() return alpha.process_sentence(sentence) if __name__ == "__main__": test_text = """ My name is James Bond. """ print("Starting parser...") parser = Parser() print("Parsing...") result = parser.parse_text(test_text) print(result) for r in result: s = Sentence(r) print(s) s.print_tree() t = transform(s) print(t)
def create_parser(lang='en'): return Parser(lang)
class Reader(object): def __init__(self, hg, stages=('hypergen-forest', 'disamb-naive', 'merge', 'shallow', 'concepts'), show_namespaces=False): self.hg = hg self.stages = stages self.parser = None self.disamb = None self.debug = False self.aux_text = '' self.show_namespaces = show_namespaces def create_stage(self, name, output): if name == 'hypergen-forest': return Hypergen(model_type='rf') elif name == 'hypergen-nn': return Hypergen(model_type='nn') elif name == 'disamb': return Disamb(self.hg, self.parser, self.disamb, output, self.aux_text) elif name == 'disamb-simple': return DisambSimple(output) elif name == 'disamb-naive': return DisambNaive(output) elif name == 'merge': return Merge(output) elif name == 'shallow': return Shallow(output) elif name == 'concepts': return Concepts(output) else: raise RuntimeError('unknnown stage name: %s' % name) def debug_msg(self, msg): logging.info(msg) if self.debug: print(msg) def read_text(self, text, aux_text=None, reset_context=True): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() self.disamb = Disambiguation(self.hg, self.parser) nlp_parses = self.parser.parse_text(text.strip()) if reset_context: self.aux_text = text if aux_text: self.aux_text = '%s\n%s' % (text, aux_text) parses = [(p[0], self.read_sentence(Sentence(p[1]))) for p in nlp_parses] for p in parses: self.debug_msg('== extra ==') for edg in p[1].edges: self.debug_msg(ed.edge2str(edg)) return parses def read_sentence(self, sentence): self.debug_msg('parsing sentence: %s' % sentence) if self.debug: sentence.print_tree() last_stage_output = None first = True for name in self.stages: stage = self.create_stage(name, last_stage_output) self.debug_msg('executing %s stage...' % name) if first: last_stage_output = stage.process_sentence(sentence) first = False else: last_stage_output = stage.process() output = last_stage_output.tree.to_hyperedge_str( with_namespaces=self.show_namespaces) self.debug_msg(output) last_stage_output.main_edge = last_stage_output.tree.to_hyperedge() # TODO: ugly... last_stage_output.sentence = None last_stage_output.tree = None return last_stage_output
return elem_id, transf def process_sentence(self, sentence): self.tree.root_id = self.process_token(sentence.root())[0] return ParserOutput(sentence, self.tree) def transform(sentence): alpha = AlphaForest() return alpha.process_sentence(sentence) if __name__ == '__main__': # learn('cases.csv', 'alpha_forest.model') test_text = """ Satellites from NASA and other agencies have been tracking sea ice changes since 1979. """ # test_text = 'Telmo is going to the gym.' print('Starting parser...') parser = Parser() print('Parsing...') result = parser.parse_text(test_text) for r in result: s = Sentence(r[1]) t = transform(s) print(t.tree.to_hyperedge_str(with_namespaces=False))