from robotoff import settings
from robotoff.products import ProductDataset
from robotoff.utils import dump_jsonl, get_logger

logger = get_logger()


def images_dimension_iter():
    dataset = ProductDataset.load()

    for product in dataset.stream().filter_nonempty_text_field("code"):
        images = product.get("images", {})
        for image_id, image_data in images.items():
            if not image_id.isdigit():
                continue

            if "full" not in image_data["sizes"]:
                continue

            width = image_data["sizes"]["full"]["w"]
            height = image_data["sizes"]["full"]["h"]
            yield [int(width), int(height), product["code"], str(image_id)]


dump_jsonl(settings.PROJECT_DIR / "images_dimension.jsonl", images_dimension_iter())
Beispiel #2
0
from robotoff.insights.importer import (
    AUTHORIZED_LABELS_STORE,
    import_insights as import_insights_,
)
from robotoff.insights.ocr import (
    extract_insights,
    get_barcode_from_path,
    ocr_iter,
    OCRResult,
)
from robotoff.models import db, ProductInsight
from robotoff.off import get_product
from robotoff.products import get_product_store
from robotoff.utils import get_logger, jsonl_iter

logger = get_logger(__name__)


def run_from_ocr_archive(
    input_: Union[str, TextIO],
    insight_type: InsightType,
    output: Optional[str] = None,
    keep_empty: bool = False,
):
    insights = generate_from_ocr_archive(input_, insight_type, keep_empty)

    if output is not None:
        output_f = open(output, "w")
    else:
        output_f = sys.stdout
Beispiel #3
0
    def generate_ocr_insights(input_: str, insight_type: str, output: str):
        from robotoff.cli import insights
        from robotoff.utils import get_logger

        get_logger()
        insights.run_from_ocr_archive(input_, insight_type, output)