def test_main(self, mock_remove_entity_overlapping, mock_analyze_sentence): article_data_sources = [MockDataSource(), MockDataSource()] bacteria_catalog = MockCatalog("a") nutrients_catalog = MockCatalog("b") diseases_catalog = MockCatalog("c") food_catalog = MockCatalog("d") sentence_parser = StanfordSentenceParser(stanford_dependency_parser=None, stanford_tokenizer=False) sentence_parser.parse_sentence = MagicMock() sentence_parser.parse_sentence.return_value = ["yeah"] constants.logger.info("asdasd") main(article_data_sources, sentence_parser, tokenizer=None, pattern_finder=None, writers=[])
do_nothing_analyzer = DoNothingSentenceAnalyzer() analyzer = SentenceAnalyzer() sentence_finder = SentenceFinder([], do_nothing_parser, do_nothing_analyzer, all_bacteria_catalog, [BACTERIA_TAG]) nxml_article_data_source = NxmlFreeArticleDataSource(articles_folder=nxml_articles_dir) medline_article_data_source = MedlineAbstractsArticleDataSource(medline_file=abstracts_dir) libgen_article_data_source = LibgenTxtArticleDataSource(libgen_folder=libgen_texts_dir) with open(verb_ontollogy_path) as f: verb_ontology = eval("".join(f.readlines())) lancaster_stemmer = LancasterStemmer() pattern_finder = PatternFinder(verb_ontology, lancaster_stemmer) article_data_sources = [nxml_article_data_source, libgen_article_data_source, medline_article_data_source] output_dir = get_output_dir_path() csv_path = get_csv_path() csv_writer = CsvWriter(csv_path) pkl_writer = PklWriter(output_dir) log_writer = LogWriter() main( article_data_sources, writers=[csv_writer, log_writer], sentence_finder=sentence_finder, data_sources_to_skip=data_sources_to_skip_number, sentences_to_skip=sentences_to_skip_number, )