def test_cli_extract_from_url(self): """bibclassify -k ${taxonomy}.rdf {url/record/94}""" path, url = self.get_test_file(94) args = ("-k %s.rdf %s" % (self.taxonomy_name, url)).split() options = bibclassify_cli._read_options(args) self.redirect() bibclassify_engine.output_keywords_for_sources( options["text_files"], options["taxonomy"], rebuild_cache=options["rebuild_cache"], no_cache=options["no_cache"], output_mode=options["output_mode"], output_limit=options["output_limit"], spires=options["spires"], match_mode=options["match_mode"], with_author_keywords=options["with_author_keywords"], extract_acronyms=options["extract_acronyms"], only_core_tags=options["only_core_tags"]) results, errors = self.unredirect() res, msg = check_pdf2(results) if not res: self.fail(msg)
def test_cli_extract_from_directory(self): """bibclassify -k ${taxonomy}.rdf directory/""" path, url = self.get_test_file(94) path = os.path.dirname(path) if not os.path.exists(path): sys.stderr.write("No PDF folder for testing found, returning\n") return args = ("-k %s.rdf %s" % (self.taxonomy_name, path)).split() options = bibclassify_cli._read_options(args) self.redirect() bibclassify_engine.output_keywords_for_sources( options["text_files"], options["taxonomy"], rebuild_cache=options["rebuild_cache"], no_cache=options["no_cache"], output_mode=options["output_mode"], output_limit=options["output_limit"], spires=options["spires"], match_mode=options["match_mode"], with_author_keywords=options["with_author_keywords"], extract_acronyms=options["extract_acronyms"], only_core_tags=options["only_core_tags"]) results, errors = self.unredirect() res, msg = check_pdf2(results) if not res: self.fail(msg)
def test_cli_extract_from_url(self): """bibclassify -k ${taxonomy}.rdf {url/record/94}""" path, url = self.get_test_file(94) args = ("-k %s.rdf %s" % (self.taxonomy_name, url)).split() options = bibclassify_cli._read_options(args) self.redirect() bibclassify_engine.output_keywords_for_sources( options["text_files"], options["taxonomy"], rebuild_cache=options["rebuild_cache"], no_cache=options["no_cache"], output_mode=options["output_mode"], output_limit=options["output_limit"], spires=options["spires"], match_mode=options["match_mode"], with_author_keywords=options["with_author_keywords"], extract_acronyms=options["extract_acronyms"], only_core_tags=options["only_core_tags"], ) results, errors = self.unredirect() res, msg = check_pdf2(results) if not res: self.fail(msg)
def test_cli_extract_from_directory(self): """bibclassify -k ${taxonomy}.rdf directory/""" path, url = self.get_test_file(94) path = os.path.dirname(path) if not os.path.exists(path): sys.stderr.write("No PDF folder for testing found, returning\n") return args = ("-k %s.rdf %s" % (self.taxonomy_name, path)).split() options = bibclassify_cli._read_options(args) self.redirect() bibclassify_engine.output_keywords_for_sources( options["text_files"], options["taxonomy"], rebuild_cache=options["rebuild_cache"], no_cache=options["no_cache"], output_mode=options["output_mode"], output_limit=options["output_limit"], spires=options["spires"], match_mode=options["match_mode"], with_author_keywords=options["with_author_keywords"], extract_acronyms=options["extract_acronyms"], only_core_tags=options["only_core_tags"], ) results, errors = self.unredirect() res, msg = check_pdf2(results) if not res: self.fail(msg)
def test_full_and_partial_matching_mode(self): """bibclassify - difference of extraction on part or full contents of pdf""" path, url = self.get_test_file(94) if not os.path.exists(path): sys.stderr.write("No PDF for testing found, returning\n") return results = [] for case in [ "-k %s.rdf %s" % (self.taxonomy_name, path), "-k %s.rdf %s -m partial" % (self.taxonomy_name, path), ]: args = (case).split() options = bibclassify_cli._read_options(args) self.redirect() bibclassify_engine.output_keywords_for_sources( options["text_files"], options["taxonomy"], rebuild_cache=options["rebuild_cache"], no_cache=options["no_cache"], output_mode=options["output_mode"], output_limit=options["output_limit"], spires=options["spires"], match_mode=options["match_mode"], with_author_keywords=options["with_author_keywords"], extract_acronyms=options["extract_acronyms"], only_core_tags=options["only_core_tags"], ) r, e = self.unredirect() results.append(r) res, msg = check_pdf1(results[1]) if not res: self.fail(msg) res, msg = check_pdf2(results[0]) if not res: self.fail(msg)
def test_full_and_partial_matching_mode(self): """bibclassify - difference of extraction on part or full contents of pdf""" path, url = self.get_test_file(94) if not os.path.exists(path): sys.stderr.write("No PDF for testing found, returning\n") return results = [] for case in [ "-k %s.rdf %s" % (self.taxonomy_name, path), "-k %s.rdf %s -m partial" % (self.taxonomy_name, path) ]: args = (case).split() options = bibclassify_cli._read_options(args) self.redirect() bibclassify_engine.output_keywords_for_sources( options["text_files"], options["taxonomy"], rebuild_cache=options["rebuild_cache"], no_cache=options["no_cache"], output_mode=options["output_mode"], output_limit=options["output_limit"], spires=options["spires"], match_mode=options["match_mode"], with_author_keywords=options["with_author_keywords"], extract_acronyms=options["extract_acronyms"], only_core_tags=options["only_core_tags"]) r, e = self.unredirect() results.append(r) res, msg = check_pdf1(results[1]) if not res: self.fail(msg) res, msg = check_pdf2(results[0]) if not res: self.fail(msg)