예제 #1
0
def rundetection(output_directory, pdf_path):
    """Detect figures from the pdf at PDF_PATH.

    Detect the figures from the pdf located at PDF_PATH and write the
    detection results to the directory specified by OUTPUT_DIRECTORY.
    """
    # import lazily to speed up response time for returning help text
    from deepfigures.extraction import pipeline

    figure_extractor = pipeline.FigureExtractionPipeline()

    figure_extractor.extract(pdf_path, output_directory)
예제 #2
0
    def test_extract(self):
        """Test extract against a known extraction."""
        pdf_path = "/work/tests/data/endtoend/paper.pdf"
        figure_extractor = pipeline.FigureExtractionPipeline()

        with tempfile.TemporaryDirectory() as tmp_dir:
            figure_extraction = figure_extractor.extract(pdf_path, tmp_dir)

            test.test_deepfigures_json(
                self,
                expected_json=
                '/work/tests/data/endtoend/_work_tests_data_endtoend_paper.pdf-result.json',
                actual_json=figure_extraction.deepfigures_json_path)
"""Detect the figures in a PDF."""

import logging
import os

import click


logger = logging.getLogger(__name__)

from deepfigures.extraction import pipeline
figure_extractor = pipeline.FigureExtractionPipeline()

# @click.command(
#     context_settings={
#         'help_option_names': ['-h', '--help']
#         })
# @click.argument(
#     'output_directory',
#     type=click.Path(file_okay=False))
# @click.argument(
#     'pdf_path',
#     type=click.Path(exists=True, dir_okay=False))
def rundetection(output_directory, pdf_path):
    """Detect figures from the pdf at PDF_PATH.

    Detect the figures from the pdf located at PDF_PATH and write the
    detection results to the directory specified by OUTPUT_DIRECTORY.
    """
    print(output_directory, pdf_path)
    # import lazily to speed up response time for returning help text