def get_extraction_runner(): runner = ExtractionRunner() runner.enable_logging('~/logs/results', '~/logs/runnables') runner.add_runnable(pdfbox.PDFBoxPlainTextExtractor) runner.add_runnable(filters.AcademicPaperFilter) runner.add_runnable(grobid.GrobidHeaderTEIExtractor) runner.add_runnable(tei.TEItoHeaderExtractor) runner.add_runnable(parscit.ParsCitCitationExtractor) runner.add_runnable(figures.PDFFiguresExtractor) runner.add_runnable(algorithms.AlgorithmsExtractor) return runner
def test_disable_logs_works(self): runner = ExtractionRunner() results_log_path = os.path.join(self.results_dir, 'results') runnables_log_path = os.path.join(self.results_dir, 'runnables') runner.enable_logging(results_log_path, runnables_log_path) runner.disable_logging() runner.add_runnable(SelfLogExtractor) runner.run('abc', output_dir = self.results_dir, run_name = 'RUN!') log_list = glob.glob(results_log_path + "*.log") self.assertFalse(log_list) log_list = glob.glob(runnables_log_path + "*.log") self.assertFalse(log_list)
def get_extraction_runner(): runner = ExtractionRunner() runner.enable_logging('~/logs/results', '~/logs/runnables') runner.add_runnable(pdfbox.PDFBoxPlainTextExtractor) runner.add_runnable(filters.AcademicPaperFilter) runner.add_runnable(grobid.GrobidHeaderTEIExtractor) runner.add_runnable(tei.TEItoHeaderExtractor) runner.add_runnable(parscit.ParsCitCitationExtractor) runner.add_runnable(figures.PDFFiguresExtractor) runner.add_runnable(algorithms.AlgorithmsExtractor) return runner
def test_disable_logs_works(self): runner = ExtractionRunner() results_log_path = os.path.join(self.results_dir, 'results') runnables_log_path = os.path.join(self.results_dir, 'runnables') runner.enable_logging(results_log_path, runnables_log_path) runner.disable_logging() runner.add_runnable(SelfLogExtractor) runner.run('abc', output_dir=self.results_dir, run_name='RUN!') log_list = glob.glob(results_log_path + "*.log") self.assertFalse(log_list) log_list = glob.glob(runnables_log_path + "*.log") self.assertFalse(log_list)
def get_extraction_runner(): runner = ExtractionRunner() runner.enable_logging('~/logs/results', '~/logs/runnables') # Option 1 runner.add_runnable(grobid.GrobidTEIExtractor) runner.add_runnable(extractors.TEItoPlainTextExtractor) runner.add_runnable(extractors.TEItoHeaderExtractor) # OR # Option 2 # runner.add_runnable(pdfbox.PDFBoxPlainTextExtractor) runner.add_runnable(filters.AcademicPaperFilter) return runner
def test_logs_work(self): runner = ExtractionRunner() results_log_path = os.path.join(self.results_dir, 'results') runnables_log_path = os.path.join(self.results_dir, 'runnables') runner.enable_logging(results_log_path, runnables_log_path) runner.add_runnable(SelfLogExtractor) runner.run('abc', output_dir = self.results_dir, run_name = 'RUN!') results_log = glob.glob(results_log_path + "*.log")[0] log_data = open(results_log, 'r').read() self.assertTrue('[SUCCESS]' in log_data) self.assertTrue('RUN!' in log_data) runnables_log = glob.glob(runnables_log_path + "*.log")[0] log_data = open(runnables_log, 'r').read() self.assertTrue('abc' in log_data) self.assertTrue('SelfLogExtractor' in log_data) self.assertTrue('RUN!' in log_data)
def test_logs_work(self): runner = ExtractionRunner() results_log_path = os.path.join(self.results_dir, 'results') runnables_log_path = os.path.join(self.results_dir, 'runnables') runner.enable_logging(results_log_path, runnables_log_path) runner.add_runnable(SelfLogExtractor) runner.run('abc', output_dir=self.results_dir, run_name='RUN!') results_log = glob.glob(results_log_path + "*.log")[0] log_data = open(results_log, 'r').read() self.assertTrue('[SUCCESS]' in log_data) self.assertTrue('RUN!' in log_data) runnables_log = glob.glob(runnables_log_path + "*.log")[0] log_data = open(runnables_log, 'r').read() self.assertTrue('abc' in log_data) self.assertTrue('SelfLogExtractor' in log_data) self.assertTrue('RUN!' in log_data)