def generate_fiber_quality_facet(): product_store: DBProductStore = get_product_store() collection = product_store.collection added = 0 seen_set: Set[str] = set() for prediction in (Prediction.select( Prediction.barcode, Prediction.source_image).where( Prediction.type == PredictionType.nutrient_mention.name, Prediction.data["mentions"].contains("fiber"), Prediction.source_image.is_null(False), ).iterator()): barcode = prediction.barcode if barcode in seen_set: continue product = product_store.get_product( barcode, ["nutriments", "data_quality_tags", "images"]) if product is None: continue nutriments = product.get("nutriments", {}) data_quality_tags = product.get("data_quality_tags", {}) images = product.get("images", {}) if (not is_valid_image(images, prediction.source_image) or "fiber" in nutriments or "fiber_prepared" in nutriments): continue facets = [] if FIBER_QUALITY_FACET_NAME not in data_quality_tags: facets.append(FIBER_QUALITY_FACET_NAME) if (FIBER_NUTRITION_QUALITY_FACET_NAME not in data_quality_tags and is_nutrition_image(images, prediction.source_image)): facets.append(FIBER_NUTRITION_QUALITY_FACET_NAME) if not facets: continue logger.info("Adding facets to {}: {}".format(barcode, facets)) seen_set.add(barcode) added += 1 collection.update_one( {"code": barcode}, { "$push": { "data_quality_tags": { "$each": facets }, "data_quality_warnings_tags": { "$each": facets }, } }, ) logger.info("Fiber quality facets added on {} products".format(added))
def get_product_predictions( barcodes: List[str], prediction_types: Optional[List[str]] = None) -> Iterator[Dict]: where_clauses = [PredictionModel.barcode.in_(barcodes)] if prediction_types is not None: where_clauses.append(PredictionModel.type.in_(prediction_types)) yield from PredictionModel.select().where( *where_clauses).dicts().iterator()
def delete_product_insights(barcode: str, server_domain: str): logger.info(f"Product {barcode} deleted, deleting associated insights...") deleted_predictions = (Prediction.delete().where( Prediction.barcode == barcode, Prediction.server_domain == server_domain, ).execute()) deleted_insights = (ProductInsight.delete().where( ProductInsight.barcode == barcode, ProductInsight.annotation.is_null(), ProductInsight.server_domain == server_domain, ).execute()) logger.info(f"{deleted_predictions} predictions deleted, " f"{deleted_insights} insights deleted")
def get_image_orientation(barcode: str, image_id: str) -> Optional[int]: for prediction in (Prediction.select( Prediction.data, Prediction.source_image).where( Prediction.barcode == barcode, Prediction.type == PredictionType.image_orientation.name, Prediction.server_domain == settings.OFF_SERVER_DOMAIN, Prediction.source_image.is_null(False), ).iterator()): prediction_image_id = get_image_id( prediction.source_image) # type: ignore if image_id is not None and prediction_image_id == image_id: return prediction.data.get("rotation") return None
def import_product_predictions( barcode: str, product_predictions_iter: Iterable[Prediction], server_domain: str, ): """Import predictions for a specific product. If a prediction already exists in DB (same (barcode, type, server_domain, source_image, value, value_tag)), it won't be imported. :param barcode: Barcode of the product. All `product_predictions` must have the same barcode. :param product_predictions_iter: Iterable of Predictions. :param server_domain: The server domain associated with the predictions. :return: The number of items imported in DB. """ timestamp = datetime.datetime.utcnow() existing_predictions = set( PredictionModel.select( PredictionModel.type, PredictionModel.server_domain, PredictionModel.source_image, PredictionModel.value_tag, PredictionModel.value, ).where(PredictionModel.barcode == barcode).tuples()) # note: there are some cases # when we could decide to replace old predictions of the same key. # It's not yet implemented. to_import = (create_prediction_model(prediction, server_domain, timestamp) for prediction in product_predictions_iter if ( prediction.type, server_domain, prediction.source_image, prediction.value_tag, prediction.value, ) not in existing_predictions) return batch_insert(PredictionModel, to_import, 50)
def test_image_brand_annotation(client, monkeypatch, fake_taxonomy): ann = LogoAnnotationFactory( image_prediction__image__source_image="/images/2.jpg", annotation_type="brand" ) barcode = ann.image_prediction.image.barcode _fake_store(monkeypatch, barcode) monkeypatch.setattr( BRAND_PREFIX_STORE, "get", lambda: {("Etorki", "0000000xxxxxx")} ) start = datetime.utcnow() result = client.simulate_post( "/api/v1/images/logos/annotate", json={ "withCredentials": True, "annotations": [{"logo_id": ann.id, "value": "etorki", "type": "brand"}], }, headers=_AUTH_HEADER, ) end = datetime.utcnow() assert result.status_code == 200 assert result.json == {"created insights": 1} ann = LogoAnnotation.get(LogoAnnotation.id == ann.id) assert ann.annotation_type == "brand" assert ann.annotation_value == "etorki" assert ann.annotation_value_tag == "etorki" assert ann.taxonomy_value == "Etorki" assert ann.username == "a" assert start <= ann.completed_at <= end # we generate a prediction predictions = list(Prediction.select().filter(barcode=barcode).execute()) assert len(predictions) == 1 (prediction,) = predictions assert prediction.type == "brand" assert prediction.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert prediction.value == "Etorki" assert prediction.value_tag == "Etorki" assert prediction.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert prediction.automatic_processing # We check that this prediction in turn generates an insight insights = list(ProductInsight.select().filter(barcode=barcode).execute()) assert len(insights) == 1 (insight,) = insights assert insight.type == "brand" assert insight.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert insight.value == "Etorki" assert insight.value_tag == "Etorki" assert insight.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert insight.automatic_processing assert insight.username == "a" assert insight.completed_at is None # we did not run annotate yet
def test_image_label_annotation(client, monkeypatch, fake_taxonomy): """This test will check that, given an image with a logo above the confidence threshold, that is then fed into the ANN logos and labels model, we annotate properly a product. """ ann = LogoAnnotationFactory(image_prediction__image__source_image="/images/2.jpg") barcode = ann.image_prediction.image.barcode _fake_store(monkeypatch, barcode) start = datetime.utcnow() result = client.simulate_post( "/api/v1/images/logos/annotate", json={ "withCredentials": True, "annotations": [ {"logo_id": ann.id, "value": "EU Organic", "type": "label"} ], }, headers=_AUTH_HEADER, ) end = datetime.utcnow() assert result.status_code == 200 assert result.json == {"created insights": 1} ann = LogoAnnotation.get(LogoAnnotation.id == ann.id) assert ann.annotation_type == "label" assert ann.annotation_value == "EU Organic" assert ann.annotation_value_tag == "eu-organic" assert ann.taxonomy_value == "en:eu-organic" assert ann.username == "a" assert start <= ann.completed_at <= end # we generate a prediction predictions = list(Prediction.select().filter(barcode=barcode).execute()) assert len(predictions) == 1 (prediction,) = predictions assert prediction.type == "label" assert prediction.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert prediction.value is None assert prediction.value_tag == "en:eu-organic" assert prediction.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert prediction.automatic_processing # We check that this prediction in turn generates an insight insights = list(ProductInsight.select().filter(barcode=barcode).execute()) assert len(insights) == 1 (insight,) = insights assert insight.type == "label" assert insight.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert insight.value is None assert insight.value_tag == "en:eu-organic" assert insight.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert insight.automatic_processing assert insight.username == "a" assert insight.completed_at is None
def generate_nutrition_image_insights(): logger.info("Starting nutrition image insight generation") logger.info("Deleting previous nutrition image insights...") deleted = (ProductInsight.delete().where( ProductInsight.annotation.is_null(), ProductInsight.type == InsightType.nutrition_image.name, ProductInsight.server_domain == settings.OFF_SERVER_DOMAIN, ).execute()) logger.info("{} insights deleted".format(deleted)) product_store: DBProductStore = get_product_store() added = 0 seen_set: Set[str] = set() prediction: Prediction for prediction in (Prediction.select().where( Prediction.type == PredictionType.nutrient_mention.name).order_by( Prediction.source_image.desc()).iterator()): barcode = prediction.barcode if barcode in seen_set: continue mentions = prediction.data["mentions"] nutrition_image_langs = find_nutrition_image_lang(mentions) if not nutrition_image_langs: continue image_id = get_image_id(prediction.source_image) rotation = get_image_orientation(barcode, image_id) if rotation is None: continue product = product_store.get_product(barcode, ["images"]) if product is None: continue images = product.get("images", {}) if not has_nutrition_image(images): for lang in nutrition_image_langs: if not (Prediction.select().where( Prediction.type == PredictionType.nutrition_image.name, Prediction.barcode == barcode, Prediction.value_tag == lang, Prediction.server_domain == settings.OFF_SERVER_DOMAIN, ).count()): ProductInsight.create( id=str(uuid.uuid4()), barcode=prediction.barcode, type=InsightType.nutrition_image.name, value_tag=lang, timestamp=datetime.datetime.utcnow(), source_image=prediction.source_image, server_domain=prediction.server_domain, server_type=get_server_type( prediction.server_domain).name, automatic_processing=False, data={ "from_prediction": str(prediction.id), "languages": nutrition_image_langs, "rotation": rotation or None, }, ) added += 1 logger.info("Added: {}".format(added))