예제 #1
0
def insert_batch(data_path: pathlib.Path, model_name: str,
                 model_version: str) -> int:
    timestamp = datetime.datetime.utcnow()
    logger.info("Loading seen set...")
    seen_set = get_seen_set()
    logger.info("Seen set loaded")
    inserted = 0

    for item in tqdm.tqdm(jsonl_iter(data_path)):
        barcode = item["barcode"]
        source_image = generate_image_path(barcode=barcode,
                                           image_id=item["image_id"])
        key = (model_name, source_image)

        if key in seen_set:
            continue

        image_instance = ImageModel.get_or_none(source_image=source_image)

        if image_instance is None:
            logger.warning("Unknown image in DB: {}".format(source_image))
            continue

        results = [r for r in item["result"] if r["score"] > 0.1]
        data = {"objects": results}
        max_confidence = max([r["score"] for r in results], default=None)

        inserted += 1
        image_prediction = ImagePrediction.create(
            type=TYPE,
            image=image_instance,
            timestamp=timestamp,
            model_name=model_name,
            model_version=model_version,
            data=data,
            max_confidence=max_confidence,
        )
        for i, item in enumerate(results):
            if item["score"] >= 0.5:
                LogoAnnotation.create(
                    image_prediction=image_prediction,
                    index=i,
                    score=item["score"],
                    bounding_box=item["bounding_box"],
                )
        seen_set.add(key)

    return inserted
예제 #2
0
    def on_post(self, req: falcon.Request, resp: falcon.Response):
        server_domain = req.media.get("server_domain", settings.OFF_SERVER_DOMAIN)
        annotations = req.media["annotations"]
        auth = parse_auth(req)
        username = None if auth is None else auth.get_username()
        completed_at = datetime.datetime.utcnow()
        annotated_logos = []

        for annotation in annotations:
            logo_id = annotation["logo_id"]
            type_ = annotation["type"]
            value = annotation["value"] or None
            logo = LogoAnnotation.get_by_id(logo_id)
            if value is not None:
                logo.annotation_value = value
                value_tag = get_tag(value)
                logo.annotation_value_tag = value_tag
                logo.taxonomy_value = match_unprefixed_value(value_tag, type_)

            logo.annotation_type = type_
            logo.username = username
            logo.completed_at = completed_at
            logo.save()
            annotated_logos.append(logo)

        created = generate_insights_from_annotated_logos(annotated_logos, server_domain)
        resp.media = {"created insights": created}
예제 #3
0
    def on_post(self, req: falcon.Request, resp: falcon.Response):
        source_value = req.get_param("source_value", required=True)
        source_type = req.get_param("source_type", required=True)
        target_value = req.get_param("target_value", required=True)
        target_type = req.get_param("target_type", required=True)

        auth = parse_auth(req)
        username = None if auth is None else auth.get_username()
        completed_at = datetime.datetime.utcnow()

        target_value_tag = get_tag(target_value)
        source_value_tag = get_tag(source_value)
        taxonomy_value = match_unprefixed_value(target_value_tag, target_type)

        query = LogoAnnotation.update(
            {
                LogoAnnotation.annotation_type: target_type,
                LogoAnnotation.annotation_value: target_value,
                LogoAnnotation.annotation_value_tag: target_value_tag,
                LogoAnnotation.taxonomy_value: taxonomy_value,
                LogoAnnotation.username: username,
                LogoAnnotation.completed_at: completed_at,
            }
        ).where(
            LogoAnnotation.annotation_type == source_type,
            LogoAnnotation.annotation_value_tag == source_value_tag,
        )
        updated = query.execute()
        resp.media = {"updated": updated}
예제 #4
0
    def export_logo_annotation(
        output: pathlib.Path,
        server_domain: Optional[str] = None,
        annotated: Optional[bool] = None,
    ):
        from robotoff.models import db, LogoAnnotation, ImageModel, ImagePrediction
        from robotoff.utils import dump_jsonl

        with db:
            where_clauses = []

            if server_domain is not None:
                where_clauses.append(ImageModel.server_domain == server_domain)

            if annotated is not None:
                where_clauses.append(
                    LogoAnnotation.annotation_value.is_null(not annotated))

            query = LogoAnnotation.select().join(ImagePrediction).join(
                ImageModel)
            if where_clauses:
                query = query.where(*where_clauses)

            logo_iter = query.iterator()
            dict_iter = (l.to_dict() for l in logo_iter)
            dump_jsonl(output, dict_iter)
예제 #5
0
    def search(self, req: falcon.Request, resp: falcon.Response):
        count: int = req.get_param_as_int(
            "count", min_value=1, max_value=2000, default=25
        )
        type_: Optional[str] = req.get_param("type")
        barcode: Optional[str] = req.get_param("barcode")
        value: Optional[str] = req.get_param("value")
        min_confidence: Optional[float] = req.get_param_as_float("min_confidence")
        random: bool = req.get_param_as_bool("random", default=False)
        server_domain: Optional[str] = req.get_param("server_domain")
        annotated: bool = req.get_param_as_bool("annotated", default=False)

        where_clauses = [LogoAnnotation.annotation_value.is_null(not annotated)]
        join_image_prediction = False
        join_image_model = False

        if server_domain:
            where_clauses.append(ImageModel.server_domain == server_domain)
            join_image_model = True

        if min_confidence is not None:
            where_clauses.append(ImagePrediction.max_confidence >= min_confidence)
            join_image_prediction = True

        if barcode is not None:
            where_clauses.append(ImageModel.barcode == barcode)
            join_image_model = True

        if type_ is not None:
            where_clauses.append(LogoAnnotation.annotation_type == type_)

        if value is not None:
            value_tag = get_tag(value)
            where_clauses.append(LogoAnnotation.annotation_value_tag == value_tag)

        query = LogoAnnotation.select()
        join_image_prediction = join_image_prediction or join_image_model

        if join_image_prediction:
            query = query.join(ImagePrediction)

            if join_image_model:
                query = query.join(ImageModel)

        if where_clauses:
            query = query.where(*where_clauses)

        query_count = query.count()

        if random:
            query = query.order_by(peewee.fn.Random())

        query = query.limit(count)
        items = [item.to_dict() for item in query.iterator()]

        for item in items:
            image_prediction = item.pop("image_prediction")
            item["image"] = image_prediction["image"]

        resp.media = {"logos": items, "count": query_count}
예제 #6
0
    def fetch_logos(self, logo_ids: List[str], resp: falcon.Response):
        logos = []
        for logo in (LogoAnnotation.select().join(ImagePrediction).join(
                ImageModel).where(LogoAnnotation.id.in_(logo_ids)).iterator()):
            logo_dict = logo.to_dict()
            image_prediction = logo_dict.pop("image_prediction")
            logo_dict["image"] = image_prediction["image"]
            logos.append(logo_dict)

        resp.media = {"logos": logos, "count": len(logos)}
예제 #7
0
    def on_get(self, req: falcon.Request, resp: falcon.Response, logo_id: int):
        logo = LogoAnnotation.get_or_none(id=logo_id)

        if logo is None:
            resp.status = falcon.HTTP_404
            return

        logo_dict = logo.to_dict()
        image_prediction = logo_dict.pop("image_prediction")
        logo_dict["image"] = image_prediction["image"]
        resp.media = logo_dict
예제 #8
0
def send_logo_notification(logo: LogoAnnotation, probs: Dict[LogoLabelType,
                                                             float]):
    crop_url = logo.get_crop_image_url()
    prob_text = "\n".join(
        (f"{label[0]} - {label[1]}: {prob:.2g}" for label, prob in sorted(
            probs.items(), key=operator.itemgetter(1), reverse=True)))
    barcode = logo.image_prediction.image.barcode
    text = (
        f"Prediction for <{crop_url}|image> "
        f"(<https://hunger.openfoodfacts.org/logos?logo_id={logo.id}|annotate logo>, "
        f"<https://world.openfoodfacts.org/product/{barcode}|product>):\n{prob_text}"
    )
    post_message(text, settings.SLACK_OFF_ROBOTOFF_ALERT_CHANNEL)
예제 #9
0
def test_crop_image_url(monkeypatch):
    monkeypatch.delenv("ROBOTOFF_SCHEME", raising=False)  # force defaults to apply
    logo_annotation = LogoAnnotation(
        image_prediction=ImagePrediction(
            type="label",
            model_name="test-model",
            model_version="1.0",
            image=ImageModel(
                barcode="123",
                image_id="image_id",
                source_image="/image",
                width=20,
                height=20,
            ),
        ),
        bounding_box=(1, 1, 2, 2),
    )

    assert logo_annotation.get_crop_image_url() == (
        f"https://robotoff.{settings._robotoff_domain}/api/v1/images/crop"
        + f"?image_url={settings.OFF_IMAGE_BASE_URL}/image&y_min=1&x_min=1&y_max=2&x_max=2"
    )
예제 #10
0
 def send_logo_notification(self, logo: LogoAnnotation,
                            probs: Dict[LogoLabelType, float]):
     crop_url = logo.get_crop_image_url()
     prob_text = "\n".join(
         (f"{label[0]} - {label[1]}: {prob:.2g}" for label, prob in sorted(
             probs.items(), key=operator.itemgetter(1), reverse=True)))
     barcode = logo.image_prediction.image.barcode
     base_off_url = settings.BaseURLProvider().get()
     text = (
         f"Prediction for <{crop_url}|image> "
         f"(<https://hunger.openfoodfacts.org/logos?logo_id={logo.id}|annotate logo>, "
         f"<{base_off_url}/product/{barcode}|product>):\n{prob_text}")
     self._post_message(_slack_message_block(text),
                        self.ROBOTOFF_ALERT_CHANNEL)
예제 #11
0
def get_logo_annotations() -> Dict[int, LogoLabelType]:
    annotations: Dict[int, LogoLabelType] = {}

    for logo in (LogoAnnotation.select(
            LogoAnnotation.id,
            LogoAnnotation.annotation_type,
            LogoAnnotation.annotation_value,
            LogoAnnotation.taxonomy_value,
    ).where(LogoAnnotation.annotation_type.is_null(False)).iterator()):
        if logo.annotation_value is None:
            annotations[logo.id] = (logo.annotation_type, None)
        elif logo.taxonomy_value is not None:
            annotations[logo.id] = (logo.annotation_type, logo.taxonomy_value)

    return annotations
예제 #12
0
def test_noop_slack_notifier_logging(caplog):
    caplog.set_level(logging.INFO)
    notifier = slack.NoopSlackNotifier()

    notifier.send_logo_notification(
        LogoAnnotation(
            image_prediction=ImagePrediction(
                barcode="123",
                image=ImageModel(source_image="/path/to/image.jpg",
                                 width=10,
                                 height=10),
            ),
            bounding_box=(1, 1, 2, 2),
        ),
        {},
    )

    (logged, ) = caplog.records
    assert logged.msg.startswith("Alerting on slack channel")
예제 #13
0
def run_object_detection(barcode: str, image_url: str, server_domain: str):
    source_image = get_source_from_image_url(image_url)
    image_instance = ImageModel.get_or_none(source_image=source_image)

    if image_instance is None:
        logger.warning("Missing image in DB for image {}".format(image_url))
        return

    timestamp = datetime.datetime.utcnow()
    results = predict_objects(barcode, image_url, server_domain)

    logos = []
    for model_name, result in results.items():
        data = result.to_json(threshold=0.1)
        max_confidence = max([item["score"] for item in data], default=None)
        image_prediction = ImagePrediction.create(
            image=image_instance,
            type="object_detection",
            model_name=model_name,
            model_version=settings.OBJECT_DETECTION_MODEL_VERSION[model_name],
            data={"objects": data},
            timestamp=timestamp,
            max_confidence=max_confidence,
        )
        for i, item in enumerate(data):
            if item["score"] >= 0.5:
                logo = LogoAnnotation.create(
                    image_prediction=image_prediction,
                    index=i,
                    score=item["score"],
                    bounding_box=item["bounding_box"],
                )
                logos.append(logo)

    if logos:
        add_logos_to_ann(image_instance, logos)
        save_nearest_neighbors(logos)
        thresholds = LOGO_CONFIDENCE_THRESHOLDS.get()
        import_logo_insights(logos,
                             thresholds=thresholds,
                             server_domain=server_domain)
예제 #14
0
    def add_logo_to_ann(sleep_time: float):
        from itertools import groupby
        import time

        import requests
        import tqdm

        from robotoff.logos import add_logos_to_ann, get_stored_logo_ids
        from robotoff.models import db, ImageModel, ImagePrediction, LogoAnnotation
        from robotoff.utils import get_logger

        logger = get_logger()
        seen = get_stored_logo_ids()

        with db:
            logos_iter = tqdm.tqdm(LogoAnnotation.select().join(
                ImagePrediction).join(ImageModel).where(
                    LogoAnnotation.nearest_neighbors.is_null()).order_by(
                        ImageModel.id).iterator())
            for _, logo_batch in groupby(
                    logos_iter, lambda x: x.image_prediction.image.id):
                logos = list(logo_batch)

                if all(l.id in seen for l in logos):
                    continue

                image = logos[0].image_prediction.image
                logger.info(f"Adding logos of image {image.id}")
                try:
                    added = add_logos_to_ann(image, logos)
                except requests.exceptions.ReadTimeout:
                    logger.warn("Request timed-out during logo addition")
                    continue

                logger.info(f"Added: {added}")

                if sleep_time:
                    time.sleep(sleep_time)
예제 #15
0
    def on_put(self, req: falcon.Request, resp: falcon.Response, logo_id: int):
        logo = LogoAnnotation.get_or_none(id=logo_id)

        if logo is None:
            resp.status = falcon.HTTP_404
            return

        type_ = req.media["type"]
        value = req.media["value"] or None
        updated = False

        if type_ != logo.annotation_type:
            logo.annotation_type = type_
            updated = True

        if value != logo.annotation_value:
            logo.annotation_value = value

            if value is not None:
                value_tag = get_tag(value)
                logo.annotation_value_tag = value_tag
                logo.taxonomy_value = match_unprefixed_value(value_tag, type_)
            else:
                logo.annotation_value_tag = None
                logo.taxonomy_value = None

            updated = True

        if updated:
            auth = parse_auth(req)
            username = None if auth is None else auth.get_username()
            logo.username = username
            logo.completed_at = datetime.datetime.utcnow()
            logo.save()

        resp.status = falcon.HTTP_204
예제 #16
0
def test_image_brand_annotation(client, monkeypatch, fake_taxonomy):
    ann = LogoAnnotationFactory(
        image_prediction__image__source_image="/images/2.jpg", annotation_type="brand"
    )
    barcode = ann.image_prediction.image.barcode
    _fake_store(monkeypatch, barcode)
    monkeypatch.setattr(
        BRAND_PREFIX_STORE, "get", lambda: {("Etorki", "0000000xxxxxx")}
    )
    start = datetime.utcnow()
    result = client.simulate_post(
        "/api/v1/images/logos/annotate",
        json={
            "withCredentials": True,
            "annotations": [{"logo_id": ann.id, "value": "etorki", "type": "brand"}],
        },
        headers=_AUTH_HEADER,
    )
    end = datetime.utcnow()
    assert result.status_code == 200
    assert result.json == {"created insights": 1}
    ann = LogoAnnotation.get(LogoAnnotation.id == ann.id)
    assert ann.annotation_type == "brand"
    assert ann.annotation_value == "etorki"
    assert ann.annotation_value_tag == "etorki"
    assert ann.taxonomy_value == "Etorki"
    assert ann.username == "a"
    assert start <= ann.completed_at <= end
    # we generate a prediction
    predictions = list(Prediction.select().filter(barcode=barcode).execute())
    assert len(predictions) == 1
    (prediction,) = predictions
    assert prediction.type == "brand"
    assert prediction.data == {
        "logo_id": ann.id,
        "confidence": 1.0,
        "username": "******",
        "is_annotation": True,
        "notify": True,
    }
    assert prediction.value == "Etorki"
    assert prediction.value_tag == "Etorki"
    assert prediction.predictor == "universal-logo-detector"
    assert start <= prediction.timestamp <= end
    assert prediction.automatic_processing
    # We check that this prediction in turn generates an insight
    insights = list(ProductInsight.select().filter(barcode=barcode).execute())
    assert len(insights) == 1
    (insight,) = insights
    assert insight.type == "brand"
    assert insight.data == {
        "logo_id": ann.id,
        "confidence": 1.0,
        "username": "******",
        "is_annotation": True,
        "notify": True,
    }
    assert insight.value == "Etorki"
    assert insight.value_tag == "Etorki"
    assert insight.predictor == "universal-logo-detector"
    assert start <= prediction.timestamp <= end
    assert insight.automatic_processing
    assert insight.username == "a"
    assert insight.completed_at is None  # we did not run annotate yet
예제 #17
0
def test_image_label_annotation(client, monkeypatch, fake_taxonomy):
    """This test will check that, given an image with a logo above the confidence threshold,
    that is then fed into the ANN logos and labels model, we annotate properly a product.
    """
    ann = LogoAnnotationFactory(image_prediction__image__source_image="/images/2.jpg")
    barcode = ann.image_prediction.image.barcode
    _fake_store(monkeypatch, barcode)
    start = datetime.utcnow()
    result = client.simulate_post(
        "/api/v1/images/logos/annotate",
        json={
            "withCredentials": True,
            "annotations": [
                {"logo_id": ann.id, "value": "EU Organic", "type": "label"}
            ],
        },
        headers=_AUTH_HEADER,
    )
    end = datetime.utcnow()
    assert result.status_code == 200
    assert result.json == {"created insights": 1}
    ann = LogoAnnotation.get(LogoAnnotation.id == ann.id)
    assert ann.annotation_type == "label"
    assert ann.annotation_value == "EU Organic"
    assert ann.annotation_value_tag == "eu-organic"
    assert ann.taxonomy_value == "en:eu-organic"
    assert ann.username == "a"
    assert start <= ann.completed_at <= end
    # we generate a prediction
    predictions = list(Prediction.select().filter(barcode=barcode).execute())
    assert len(predictions) == 1
    (prediction,) = predictions
    assert prediction.type == "label"
    assert prediction.data == {
        "logo_id": ann.id,
        "confidence": 1.0,
        "username": "******",
        "is_annotation": True,
        "notify": True,
    }
    assert prediction.value is None
    assert prediction.value_tag == "en:eu-organic"
    assert prediction.predictor == "universal-logo-detector"
    assert start <= prediction.timestamp <= end
    assert prediction.automatic_processing
    # We check that this prediction in turn generates an insight
    insights = list(ProductInsight.select().filter(barcode=barcode).execute())
    assert len(insights) == 1
    (insight,) = insights
    assert insight.type == "label"
    assert insight.data == {
        "logo_id": ann.id,
        "confidence": 1.0,
        "username": "******",
        "is_annotation": True,
        "notify": True,
    }
    assert insight.value is None
    assert insight.value_tag == "en:eu-organic"
    assert insight.predictor == "universal-logo-detector"
    assert start <= prediction.timestamp <= end
    assert insight.automatic_processing
    assert insight.username == "a"
    assert insight.completed_at is None
예제 #18
0
def run_object_detection(
    barcode: str, image: Image.Image, source_image: str, server_domain: str
):
    """Detect logos using the universal logo detector model and generate
    logo-related insights.

    :param barcode: Product barcode
    :param image: Pillow Image to run the object detection on
    :param image_url: URL of the image to use
    :param server_domain: The server domain associated with the image
    """
    logger.info(
        f"Running object detection for product {barcode} ({server_domain}), "
        f"image {source_image}"
    )
    image_instance = ImageModel.get_or_none(source_image=source_image)

    if image_instance is None:
        logger.warning("Missing image in DB for image %s", source_image)
        return

    timestamp = datetime.datetime.utcnow()
    model_name = "universal-logo-detector"
    results = ObjectDetectionModelRegistry.get(model_name).detect_from_image(
        image, output_image=False
    )
    data = results.to_json(threshold=0.1)
    max_confidence = max([item["score"] for item in data], default=None)
    image_prediction = ImagePrediction.create(
        image=image_instance,
        type="object_detection",
        model_name=model_name,
        model_version=settings.OBJECT_DETECTION_MODEL_VERSION[model_name],
        data={"objects": data},
        timestamp=timestamp,
        max_confidence=max_confidence,
    )

    logos = []
    for i, item in enumerate(data):
        if item["score"] >= 0.5:
            logo = LogoAnnotation.create(
                image_prediction=image_prediction,
                index=i,
                score=item["score"],
                bounding_box=item["bounding_box"],
            )
            logos.append(logo)

    logger.info(f"{len(logos)} logos found for image {source_image}")
    if logos:
        add_logos_to_ann(image_instance, logos)

        try:
            save_nearest_neighbors(logos)
        except requests.exceptions.HTTPError as e:
            resp = e.response
            logger.warning(
                f"Could not save nearest neighbors in ANN: {resp.status_code}: %s",
                resp.text,
            )

        thresholds = LOGO_CONFIDENCE_THRESHOLDS.get()
        import_logo_insights(logos, thresholds=thresholds, server_domain=server_domain)
import json

from robotoff import settings
from robotoff.models import LogoAnnotation, db

annotations = {}
with db:
    for logo_annotation in (LogoAnnotation.select(
            LogoAnnotation.id, LogoAnnotation.taxonomy_value).where(
                LogoAnnotation.taxonomy_value.is_null(False)).iterator()):
        annotations[logo_annotation.id] = logo_annotation.taxonomy_value
with (settings.DATASET_DIR / "annotations.jsonl").open("w") as f:
    json.dump(annotations, f)