Esempio n. 1
0
def updated_product_add_category_insight(barcode: str, product: JSONType,
                                         server_domain: str) -> bool:
    if get_server_type(server_domain) != ServerType.off:
        return False

    insights = []
    insight = predict_category_from_product_es(product)

    if insight is not None:
        insights.append(insight)

    insights += predict_category_from_product_ml(product,
                                                 filter_blacklisted=True)

    if not insights:
        return False

    product_store = get_product_store()
    importer = InsightImporterFactory.create(InsightType.category.name,
                                             product_store)

    imported = importer.import_insights(insights,
                                        server_domain=server_domain,
                                        automatic=False)

    if imported:
        logger.info("Category insight imported for product {}".format(barcode))

    return bool(imported)
Esempio n. 2
0
    def add_fields(
        self,
        insights: Iterator[Insight],
        timestamp: datetime.datetime,
        server_domain: str,
    ) -> Iterator[Insight]:
        """Add mandatory insight fields."""
        server_type: str = get_server_type(server_domain).name

        for insight in insights:
            barcode = insight.barcode
            product = self.product_store[barcode]
            insight.reserved_barcode = is_reserved_barcode(barcode)
            insight.server_domain = server_domain
            insight.server_type = server_type
            insight.id = str(uuid.uuid4())
            insight.timestamp = timestamp
            insight.countries = getattr(product, "countries_tags", [])
            insight.brands = getattr(product, "brands_tags", [])

            if insight.automatic_processing and not insight.latent:
                insight.process_after = timestamp + datetime.timedelta(
                    minutes=10)

            yield insight
Esempio n. 3
0
def save_image(
    barcode: str, image_url: str, product: Optional[Product], server_domain: str
) -> Optional[ImageModel]:
    """Save imported image details in DB."""
    if product is None:
        logger.warning(
            "Product {} does not exist during image import ({})".format(
                barcode, image_url
            )
        )
        return None

    source_image = get_source_from_image_url(image_url)
    image_id = pathlib.Path(source_image).stem

    if not image_id.isdigit():
        logger.warning("Non raw image was sent: {}".format(image_url))
        return None

    if image_id not in product.images:
        logger.warning("Unknown image for product {}: {}".format(barcode, image_url))
        return None

    image = product.images[image_id]
    sizes = image.get("sizes", {}).get("full")

    if not sizes:
        logger.warning("Image with missing size information: {}".format(image))
        return None

    width = sizes["w"]
    height = sizes["h"]

    if "uploaded_t" not in image:
        logger.warning("Missing uploaded_t field: {}".format(image.keys()))
        return None

    uploaded_t = image["uploaded_t"]
    if isinstance(uploaded_t, str):
        if not uploaded_t.isdigit():
            logger.warning("Non digit uploaded_t value: {}".format(uploaded_t))
            return None

        uploaded_t = int(uploaded_t)

    uploaded_at = datetime.datetime.utcfromtimestamp(uploaded_t)
    return ImageModel.create(
        barcode=barcode,
        image_id=image_id,
        width=width,
        height=height,
        source_image=source_image,
        uploaded_at=uploaded_at,
        server_domain=server_domain,
        server_type=get_server_type(server_domain).name,
    )
Esempio n. 4
0
def save_image(
    barcode: str, source_image: str, product: Product, server_domain: str
) -> Optional[ImageModel]:
    """Save imported image details in DB."""
    image_id = pathlib.Path(source_image).stem

    if not image_id.isdigit():
        logger.warning("Non raw image was sent: %s", source_image)
        return None

    if image_id not in product.images:
        logger.warning("Unknown image for product %s: %s", barcode, source_image)
        return None

    image = product.images[image_id]
    sizes = image.get("sizes", {}).get("full")

    if not sizes:
        logger.warning("Image with missing size information: %s", image)
        return None

    width = sizes["w"]
    height = sizes["h"]

    if "uploaded_t" not in image:
        logger.warning("Missing uploaded_t field: %s", list(image))
        return None

    uploaded_t = image["uploaded_t"]
    if isinstance(uploaded_t, str):
        if not uploaded_t.isdigit():
            logger.warning("Non digit uploaded_t value: %s", uploaded_t)
            return None

        uploaded_t = int(uploaded_t)

    uploaded_at = datetime.datetime.utcfromtimestamp(uploaded_t)
    image_model = ImageModel.create(
        barcode=barcode,
        image_id=image_id,
        width=width,
        height=height,
        source_image=source_image,
        uploaded_at=uploaded_at,
        server_domain=server_domain,
        server_type=get_server_type(server_domain).name,
    )
    if image_model is not None:
        logger.info("New image %s created in DB", image_model.id)
    return image_model
Esempio n. 5
0
def add_category_insight(barcode: str, product: JSONType,
                         server_domain: str) -> bool:
    """Predict categories for product and import predicted category insight.

    :param barcode: product barcode
    :param product: product as retrieved from application
    :param server_domain: the server the product belongs to
    :return: True if at least one category insight was imported
    """
    if get_server_type(server_domain) != ServerType.off:
        return False

    logger.info("Predicting product categories...")
    # predict category using Elasticsearch on title
    product_predictions = []
    es_prediction = predict_category_from_product_es(product)

    if es_prediction is not None:
        product_predictions.append(es_prediction)

    # predict category using neural model
    neural_predictions = []
    try:
        neural_predictions = CategoryClassifier(
            get_taxonomy(TaxonomyType.category.name)).predict(product)
    except requests.exceptions.HTTPError as e:
        resp = e.response
        logger.error(
            f"Category classifier returned an error: {resp.status_code}: %s",
            resp.text)

    for neural_prediction in neural_predictions:
        neural_prediction.barcode = barcode
        product_predictions.append(neural_prediction)

    if len(product_predictions) < 1:
        return False

    imported = import_insights(product_predictions,
                               server_domain,
                               automatic=True)
    logger.info(f"{imported} category insight imported for product {barcode}")

    return bool(imported)
Esempio n. 6
0
    def on_post(self, req: falcon.Request, resp: falcon.Response):
        timestamp = datetime.datetime.utcnow()
        inserts = []

        for prediction in req.media["predictions"]:
            server_domain: str = prediction.get("server_domain",
                                                settings.OFF_SERVER_DOMAIN)
            server_type: str = get_server_type(server_domain).name
            source_image = generate_image_path(prediction["barcode"],
                                               prediction.pop("image_id"))
            inserts.append({
                "timestamp": timestamp,
                "server_domain": server_domain,
                "server_type": server_type,
                "source_image": source_image,
                **prediction,
            })

        inserted = batch_insert(ImagePrediction, inserts)
        logger.info("{} image predictions inserted".format(inserted))
Esempio n. 7
0
    def add_fields(
        self,
        insights: Iterable[JSONType],
        timestamp: datetime.datetime,
        server_domain: str,
    ) -> Iterable[JSONType]:
        """Add mandatory insight fields."""
        server_type: str = get_server_type(server_domain).name

        for insight in insights:
            barcode = insight["barcode"]
            product = self.product_store[barcode]
            insight["reserved_barcode"] = is_reserved_barcode(barcode)
            insight["server_domain"] = server_domain
            insight["server_type"] = server_type
            insight["id"] = str(uuid.uuid4())
            insight["timestamp"] = timestamp
            insight["type"] = self.get_type()
            insight["countries"] = getattr(product, "countries_tags", [])
            insight["brands"] = getattr(product, "brands_tags", [])
            yield insight
Esempio n. 8
0
def generate_nutrition_image_insights():
    logger.info("Starting nutrition image insight generation")
    logger.info("Deleting previous nutrition image insights...")
    deleted = (ProductInsight.delete().where(
        ProductInsight.annotation.is_null(),
        ProductInsight.type == InsightType.nutrition_image.name,
        ProductInsight.server_domain == settings.OFF_SERVER_DOMAIN,
    ).execute())
    logger.info("{} insights deleted".format(deleted))
    product_store: DBProductStore = get_product_store()
    added = 0
    seen_set: Set[str] = set()

    prediction: Prediction
    for prediction in (Prediction.select().where(
            Prediction.type == PredictionType.nutrient_mention.name).order_by(
                Prediction.source_image.desc()).iterator()):
        barcode = prediction.barcode

        if barcode in seen_set:
            continue

        mentions = prediction.data["mentions"]
        nutrition_image_langs = find_nutrition_image_lang(mentions)

        if not nutrition_image_langs:
            continue

        image_id = get_image_id(prediction.source_image)
        rotation = get_image_orientation(barcode, image_id)

        if rotation is None:
            continue

        product = product_store.get_product(barcode, ["images"])

        if product is None:
            continue

        images = product.get("images", {})

        if not has_nutrition_image(images):
            for lang in nutrition_image_langs:
                if not (Prediction.select().where(
                        Prediction.type == PredictionType.nutrition_image.name,
                        Prediction.barcode == barcode,
                        Prediction.value_tag == lang,
                        Prediction.server_domain == settings.OFF_SERVER_DOMAIN,
                ).count()):
                    ProductInsight.create(
                        id=str(uuid.uuid4()),
                        barcode=prediction.barcode,
                        type=InsightType.nutrition_image.name,
                        value_tag=lang,
                        timestamp=datetime.datetime.utcnow(),
                        source_image=prediction.source_image,
                        server_domain=prediction.server_domain,
                        server_type=get_server_type(
                            prediction.server_domain).name,
                        automatic_processing=False,
                        data={
                            "from_prediction": str(prediction.id),
                            "languages": nutrition_image_langs,
                            "rotation": rotation or None,
                        },
                    )
                    added += 1

    logger.info("Added: {}".format(added))
Esempio n. 9
0
    def generate_insights(
        cls,
        predictions: List[Prediction],
        server_domain: str,
        automatic: bool,
        product_store: DBProductStore,
    ) -> Iterator[Tuple[List[ProductInsight], List[ProductInsight]]]:
        """Given a list of predictions, yield tuples of ProductInsight to
        create and delete.

        It calls the `generate_candidates` method, specific to each insight type
        (and implemented in sub-classes).
        """
        timestamp = datetime.datetime.utcnow()
        server_type = get_server_type(server_domain).name

        for barcode, group in itertools.groupby(
                sorted(predictions, key=operator.attrgetter("barcode")),
                operator.attrgetter("barcode"),
        ):
            product = product_store[barcode]
            references = get_existing_insight(cls.get_type(), barcode,
                                              server_domain)

            if product is None:
                logger.info(
                    f"Product {barcode} not found in DB, deleting existing insights"
                )
                if references:
                    yield [], references
                continue

            product_predictions = sort_predictions(group)
            candidates = [
                candidate for candidate in cls.generate_candidates(
                    product, product_predictions) if is_valid_insight_image(
                        product.images, candidate.source_image)
            ]
            for candidate in candidates:
                if candidate.automatic_processing is None:
                    logger.warning(
                        "Insight with automatic_processing=None: %s",
                        candidate.__data__)

                if not is_trustworthy_insight_image(product.images,
                                                    candidate.source_image):
                    # Don't process automatically if the insight image is not
                    # trustworthy (too old and not selected)
                    candidate.automatic_processing = False
                if candidate.data.get("is_annotation"):
                    username = candidate.data.get("username")
                    if username:
                        # logo annotation by a user
                        candidate.username = username
                    # Note: we could add vote annotation for anonymous user,
                    # but it should be done outside this loop. It's not yet implemented

            to_create, to_delete = cls.get_insight_update(
                candidates, references)

            for insight in to_create:
                if not automatic:
                    insight.automatic_processing = False
                cls.add_fields(insight, product, timestamp, server_domain,
                               server_type)

            yield to_create, to_delete