Exemple #1
0
    def test_cli_extract_from_url(self):
        """bibclassify -k ${taxonomy}.rdf {url/record/94}"""

        path, url = self.get_test_file(94)

        args = ("-k %s.rdf %s" % (self.taxonomy_name, url)).split()
        options = bibclassify_cli._read_options(args)

        self.redirect()

        bibclassify_engine.output_keywords_for_sources(
            options["text_files"],
            options["taxonomy"],
            rebuild_cache=options["rebuild_cache"],
            no_cache=options["no_cache"],
            output_mode=options["output_mode"],
            output_limit=options["output_limit"],
            spires=options["spires"],
            match_mode=options["match_mode"],
            with_author_keywords=options["with_author_keywords"],
            extract_acronyms=options["extract_acronyms"],
            only_core_tags=options["only_core_tags"])

        results, errors = self.unredirect()

        res, msg = check_pdf2(results)
        if not res:
            self.fail(msg)
Exemple #2
0
    def test_cli_extract_from_directory(self):
        """bibclassify -k ${taxonomy}.rdf directory/"""

        path, url = self.get_test_file(94)

        path = os.path.dirname(path)

        if not os.path.exists(path):
            sys.stderr.write("No PDF folder for testing found, returning\n")
            return

        args = ("-k %s.rdf %s" % (self.taxonomy_name, path)).split()
        options = bibclassify_cli._read_options(args)

        self.redirect()

        bibclassify_engine.output_keywords_for_sources(
            options["text_files"],
            options["taxonomy"],
            rebuild_cache=options["rebuild_cache"],
            no_cache=options["no_cache"],
            output_mode=options["output_mode"],
            output_limit=options["output_limit"],
            spires=options["spires"],
            match_mode=options["match_mode"],
            with_author_keywords=options["with_author_keywords"],
            extract_acronyms=options["extract_acronyms"],
            only_core_tags=options["only_core_tags"])

        results, errors = self.unredirect()

        res, msg = check_pdf2(results)
        if not res:
            self.fail(msg)
    def test_cli_extract_from_url(self):
        """bibclassify -k ${taxonomy}.rdf {url/record/94}"""

        path, url = self.get_test_file(94)

        args = ("-k %s.rdf %s" % (self.taxonomy_name, url)).split()
        options = bibclassify_cli._read_options(args)

        self.redirect()

        bibclassify_engine.output_keywords_for_sources(
            options["text_files"],
            options["taxonomy"],
            rebuild_cache=options["rebuild_cache"],
            no_cache=options["no_cache"],
            output_mode=options["output_mode"],
            output_limit=options["output_limit"],
            spires=options["spires"],
            match_mode=options["match_mode"],
            with_author_keywords=options["with_author_keywords"],
            extract_acronyms=options["extract_acronyms"],
            only_core_tags=options["only_core_tags"],
        )

        results, errors = self.unredirect()

        res, msg = check_pdf2(results)
        if not res:
            self.fail(msg)
    def test_cli_extract_from_directory(self):
        """bibclassify -k ${taxonomy}.rdf directory/"""

        path, url = self.get_test_file(94)

        path = os.path.dirname(path)

        if not os.path.exists(path):
            sys.stderr.write("No PDF folder for testing found, returning\n")
            return

        args = ("-k %s.rdf %s" % (self.taxonomy_name, path)).split()
        options = bibclassify_cli._read_options(args)

        self.redirect()

        bibclassify_engine.output_keywords_for_sources(
            options["text_files"],
            options["taxonomy"],
            rebuild_cache=options["rebuild_cache"],
            no_cache=options["no_cache"],
            output_mode=options["output_mode"],
            output_limit=options["output_limit"],
            spires=options["spires"],
            match_mode=options["match_mode"],
            with_author_keywords=options["with_author_keywords"],
            extract_acronyms=options["extract_acronyms"],
            only_core_tags=options["only_core_tags"],
        )

        results, errors = self.unredirect()

        res, msg = check_pdf2(results)
        if not res:
            self.fail(msg)
    def test_full_and_partial_matching_mode(self):
        """bibclassify - difference of extraction on part or full contents of pdf"""

        path, url = self.get_test_file(94)

        if not os.path.exists(path):
            sys.stderr.write("No PDF for testing found, returning\n")
            return

        results = []
        for case in [
            "-k %s.rdf %s" % (self.taxonomy_name, path),
            "-k %s.rdf %s -m partial" % (self.taxonomy_name, path),
        ]:
            args = (case).split()
            options = bibclassify_cli._read_options(args)

            self.redirect()

            bibclassify_engine.output_keywords_for_sources(
                options["text_files"],
                options["taxonomy"],
                rebuild_cache=options["rebuild_cache"],
                no_cache=options["no_cache"],
                output_mode=options["output_mode"],
                output_limit=options["output_limit"],
                spires=options["spires"],
                match_mode=options["match_mode"],
                with_author_keywords=options["with_author_keywords"],
                extract_acronyms=options["extract_acronyms"],
                only_core_tags=options["only_core_tags"],
            )

            r, e = self.unredirect()
            results.append(r)

        res, msg = check_pdf1(results[1])
        if not res:
            self.fail(msg)
        res, msg = check_pdf2(results[0])
        if not res:
            self.fail(msg)
Exemple #6
0
    def test_full_and_partial_matching_mode(self):
        """bibclassify - difference of extraction on part or full contents of pdf"""

        path, url = self.get_test_file(94)

        if not os.path.exists(path):
            sys.stderr.write("No PDF for testing found, returning\n")
            return

        results = []
        for case in [
                "-k %s.rdf %s" % (self.taxonomy_name, path),
                "-k %s.rdf %s -m partial" % (self.taxonomy_name, path)
        ]:
            args = (case).split()
            options = bibclassify_cli._read_options(args)

            self.redirect()

            bibclassify_engine.output_keywords_for_sources(
                options["text_files"],
                options["taxonomy"],
                rebuild_cache=options["rebuild_cache"],
                no_cache=options["no_cache"],
                output_mode=options["output_mode"],
                output_limit=options["output_limit"],
                spires=options["spires"],
                match_mode=options["match_mode"],
                with_author_keywords=options["with_author_keywords"],
                extract_acronyms=options["extract_acronyms"],
                only_core_tags=options["only_core_tags"])

            r, e = self.unredirect()
            results.append(r)

        res, msg = check_pdf1(results[1])
        if not res:
            self.fail(msg)
        res, msg = check_pdf2(results[0])
        if not res:
            self.fail(msg)