Beispiel #1
0
def generate_ocr_insights(
    source: str,
    insight_type: str,
    output: Path = Option(
        ...,
        help="File to write output to, stdout if not specified",
        dir_okay=False,
        writable=True,
    ),
    keep_empty: bool = Argument(..., help="Keep documents with empty insight"),
) -> None:
    """Generate OCR insights of the requested type.

    SOURCE can be either:
    * the path to a JSON file, a (gzipped-)JSONL file, or a directory
        containing JSON files
    * a barcode
    * the '-' character: input is read from stdin and assumed to be JSONL

    Output is JSONL, each line containing the insights for one document.
    """
    from typing import TextIO, Union

    from robotoff.cli import insights
    from robotoff.insights._enum import InsightType
    from robotoff.utils import get_logger

    input_: Union[str, TextIO] = sys.stdin if source == "-" else source

    get_logger()
    insights.run_from_ocr_archive(input_, InsightType[insight_type], output,
                                  keep_empty)
Beispiel #2
0
    def generate_ocr_insights(source: str, insight_type: str, output: str,
                              keep_empty: bool):
        from typing import TextIO, Union
        from robotoff.cli import insights
        from robotoff.insights._enum import InsightType
        from robotoff.utils import get_logger

        input_: Union[str, TextIO] = sys.stdin if source == "-" else source

        get_logger()
        insights.run_from_ocr_archive(input_, InsightType[insight_type],
                                      output, keep_empty)
Beispiel #3
0
 def generate_ocr_insights(input_: str, insight_type: str, output: str):
     from robotoff.cli import insights
     insights.run_from_ocr_archive(input_, insight_type, output)
Beispiel #4
0
    def generate_ocr_insights(input_: str, insight_type: str, output: str):
        from robotoff.cli import insights
        from robotoff.utils import get_logger

        get_logger()
        insights.run_from_ocr_archive(input_, insight_type, output)