def run(pipeline): '''Run a complete pipeline of graph operations. Parameters ---------- pipeline : dict The pipeline description. Returns ------- The result from running the pipeline with the provided arguments. ''' # INPUT if 'graph' in pipeline: graph = pipeline['graph'] elif 'text' in pipeline: ts = [] try: ts.append(pipeline.get('parser', DEF_PARSER) + '_parse') except KeyError: pass try: ts += pipeline.get('transformers', DEF_TRANSFORMERS) T = transformers.get_pipeline(ts) except KeyError: raise ValueError("Unknown transformer pipeline") T_args = pipeline.get('transformer_args', DEF_T_ARGS) graph = CG(transformer=T, transformer_args=T_args, text=pipeline['text']) else: raise ValueError('Must provide either graph or text') # OPERATIONS for operation in pipeline.get('operations', []): try: name = operation.pop('op') except KeyError: raise ValueError("No name for the operation") try: graph = operate(graph, name, **operation) except TypeError as e: raise ValueError(e) operation['op'] = name # OUTPUT if 'linearizers' in pipeline: try: L = linearizers.get_pipeline( pipeline.get('linearizers', DEF_LINEARIZERS)) except KeyError: raise ValueError("Unknown linearizer pipeline") L_args = pipeline.get('linearizer_args', DEF_L_ARGS) return graph.linearize(linearizer=L, linearizer_args=L_args) else: return graph
def __init__(self, text, superficial=False, autocorrect=True): if autocorrect: self.text = correct_phrase(text) else: self.text = text if superficial: self.parse = nlp(self.text) else: self.graph = CG(transformer=semantic_analyzer, text=self.text) self.parse = self.graph.spacy_parse
def __init__(self, text, superficial=False, autocorrect=True): # Corrects posible misspelled words in the queries if autocorrect: self.text = correct_phrase(text) else: self.text = text if superficial: # Parses the query without generating the semantic graph self.parse = nlp(self.text) else: # Deeply parses the query generating the semantic graph self.graph = CG(transformer=semantic_analyzer, text=self.text) self.parse = self.graph.spacy_parse
def operate(graph, **args): subgraph, main_entity = spot_domain(graph) r = CG(graph, subgraph=subgraph) r.gram['main_entity'] = main_entity return r
def compose_answer(question, answer_graph): g = CG(original=question.graph) graft(g, question.graph.questions[0], answer_graph, answer_graph.roots[0]) return (g.linearize(linearizer=nlg))
#!/usr/bin/env python3 from grafeno import Graph as CG, transformers as tr, linearizers as ln T = tr.get_pipeline(['pos_extract', 'thematic', 'phrasal', 'wordnet']) L = ln.get_pipeline(['simple_nlg']) g1 = CG(transformer=T, text="A man picked up an apple.") g2 = CG(transformer=T, text="The woman lifted some pears.") import grafeno.operations.generalize as gen g3 = gen.generalize(g1, g2, node_generalize=gen.wordnet_generalize) print(g3.linearize(linearizer=L))
arg_parser = argparse.ArgumentParser( description='Example topic detection script') arg_parser.add_argument('file', type=argparse.FileType('r')) arg_parser.add_argument('-l', '--lang', help='language of the text', default='en') args = arg_parser.parse_args() T = tr.get_pipeline( ['spacy_parse', 'pos_extract', 'thematic', 'phrasal', 'wordnet']) g = CG(transformer=T, transformer_args={ 'lang': args.lang, 'sempos': { 'noun': 'n' }, 'unique_gram': { 'hyper': [True] }, 'extended_sentence_edges': ['HYP'] }, text=args.file.read()) g = operate(g, 'cluster', hubratio=0.2) for hvs in g.gram['HVS']: keywords = set(g.node[n]['concept'] for n in hvs) print('Topic: {}'.format(', '.join(keywords)))