def setup(config: Config) -> Pipeline: resource = Resources() query_pipeline = Pipeline[MultiPack](resource=resource) query_pipeline.set_reader( reader=MultiPackTerminalReader(), config=config.reader) query_pipeline.add( component=MicrosoftBingTranslator(), config=config.translator) query_pipeline.add( component=BertBasedQueryCreator(), config=config.query_creator) query_pipeline.add( component=SearchProcessor(), config=config.searcher) top_response_pack_name = config.indexer.response_pack_name + '_0' query_pipeline.add( component=NLTKSentenceSegmenter(), selector=NameMatchSelector(select_name=top_response_pack_name)) query_pipeline.add( component=NLTKWordTokenizer(), selector=NameMatchSelector(select_name=top_response_pack_name)) query_pipeline.add( component=NLTKPOSTagger(), selector=NameMatchSelector(select_name=top_response_pack_name)) query_pipeline.add( component=SRLPredictor(), config=config.SRL, selector=NameMatchSelector(select_name=top_response_pack_name)) query_pipeline.add( component=MicrosoftBingTranslator(), config=config.back_translator) query_pipeline.initialize() return query_pipeline
def test_pipeline(self, texts): for idx, text in enumerate(texts): file_path = os.path.join(self.test_dir, f"{idx+1}.txt") with open(file_path, 'w') as f: f.write(text) nlp = Pipeline() reader_config = HParams( { "input_pack_name": "input", "output_pack_name": "output" }, MultiPackSentenceReader.default_hparams()) nlp.set_reader(reader=MultiPackSentenceReader(), config=reader_config) translator_config = HParams( { "src_language": "de", "target_language": "en", "in_pack_name": "input", "out_pack_name": "result" }, None) nlp.add_processor(MicrosoftBingTranslator(), config=translator_config) nlp.initialize() english_results = ["Hey good morning", "This is Forte. A tool for NLP"] for idx, m_pack in enumerate(nlp.process_dataset(self.test_dir)): self.assertEqual(set(m_pack._pack_names), set(["input", "output", "result"])) self.assertEqual( m_pack.get_pack("result").text, english_results[idx] + "\n")
def main(): config = yaml.safe_load(open("config.yml", "r")) config = HParams(config, default_hparams=None) resource = Resources() query_pipeline = Pipeline(resource=resource) query_pipeline.set_reader(reader=MultiPackTerminalReader(), config=config.reader) query_pipeline.add_processor(processor=MicrosoftBingTranslator(), config=config.translator) query_pipeline.add_processor(processor=BertBasedQueryCreator(), config=config.query_creator) query_pipeline.add_processor(processor=SearchProcessor(), config=config.indexer) query_pipeline.add_processor( processor=NLTKSentenceSegmenter(), selector=NameMatchSelector( select_name=config.indexer.response_pack_name[0])) query_pipeline.add_processor( processor=NLTKWordTokenizer(), selector=NameMatchSelector( select_name=config.indexer.response_pack_name[0])) query_pipeline.add_processor( processor=NLTKPOSTagger(), selector=NameMatchSelector( select_name=config.indexer.response_pack_name[0])) query_pipeline.add_processor( processor=SRLPredictor(), config=config.SRL, selector=NameMatchSelector( select_name=config.indexer.response_pack_name[0])) query_pipeline.add_processor(processor=MicrosoftBingTranslator(), config=config.back_translator) query_pipeline.initialize() for m_pack in query_pipeline.process_dataset(): # update resource to be used in the next conversation query_pack = m_pack.get_pack(config.translator.in_pack_name) if resource.get("user_utterance"): resource.get("user_utterance").append(query_pack) else: resource.update(user_utterance=[query_pack]) response_pack = m_pack.get_pack(config.back_translator.in_pack_name) if resource.get("bot_utterance"): resource.get("bot_utterance").append(response_pack) else: resource.update(bot_utterance=[response_pack]) english_pack = m_pack.get_pack("pack") print(colored("English Translation of the query: ", "green"), english_pack.text, "\n") pack = m_pack.get_pack(config.indexer.response_pack_name[0]) print(colored("Retrieved Document", "green"), pack.text, "\n") print(colored("German Translation", "green"), m_pack.get_pack("response").text, "\n") for sentence in pack.get(Sentence): sent_text = sentence.text print(colored("Sentence:", 'red'), sent_text, "\n") print(colored("Semantic role labels:", 'red')) for link in pack.get(PredicateLink, sentence): parent = link.get_parent() child = link.get_child() print(f" - \"{child.text}\" is role {link.arg_type} of " f"predicate \"{parent.text}\"") print() input(colored("Press ENTER to continue...\n", 'green'))