def update_product( self, insight: ProductInsight, auth: Optional[OFFAuthentication] = None) -> AnnotationResult: emb_code: str = insight.value product = get_product(insight.barcode, ["emb_codes"]) if product is None: return MISSING_PRODUCT_RESULT emb_codes_str: str = product.get("emb_codes", "") emb_codes: List[str] = [] if emb_codes_str: emb_codes = emb_codes_str.split(",") if self.already_exists(emb_code, emb_codes): return ALREADY_ANNOTATED_RESULT emb_codes.append(emb_code) update_emb_codes( insight.barcode, emb_codes, server_domain=insight.server_domain, insight_id=insight.id, auth=auth, ) return UPDATED_ANNOTATION_RESULT
def updated_product_update_insights(barcode: str): product_dict = get_product(barcode) if product_dict is None: logger.warn("Updated product does not exist: {}".format(barcode)) return category_added = updated_product_add_category_insight( barcode, product_dict) if category_added: logger.info("Product {} updated".format(barcode)) product = Product(product_dict) validators: Dict[str, InsightValidator] = {} for insight in (ProductInsight.select().where( ProductInsight.annotation.is_null(), ProductInsight.barcode == barcode).iterator()): if insight.type not in validators: validators[insight.type] = InsightValidatorFactory.create( insight.type, None) validator = validators[insight.type] insight_deleted = delete_invalid_insight(insight, validator=validator, product=product) if insight_deleted: logger.info("Insight {} deleted (type: {})".format( insight.id, insight.type))
def update_product( self, insight: ProductInsight, session_cookie: Optional[str] = None) -> AnnotationResult: emb_code: str = insight.data['text'] product = get_product(insight.barcode, ['emb_codes']) if product is None: return MISSING_PRODUCT_RESULT emb_codes_str: str = product.get('emb_codes', '') emb_codes: List[str] = [] if emb_codes_str: emb_codes = emb_codes_str.split(',') if self.already_exists(emb_code, emb_codes): return ALREADY_ANNOTATED_RESULT emb_codes.append(emb_code) update_emb_codes(insight.barcode, emb_codes, insight_id=insight.id, session_cookie=session_cookie) return UPDATED_ANNOTATION_RESULT
def is_automatically_processable(insight: ProductInsight, max_timedelta: datetime.timedelta) -> bool: if not insight.source_image: return False image_path = pathlib.Path(insight.source_image) image_id = image_path.stem if not image_id.isdigit(): return False product = get_product(insight.barcode, fields=["images"]) if product is None: logger.info("Missing product: {}".format(insight.barcode)) raise InvalidInsight() if "images" not in product: logger.info("No images for product {}".format(insight.barcode)) raise InvalidInsight() product_images = product["images"] if image_id not in product_images: logger.info("Missing image for product {}, ID: {}".format( insight.barcode, image_id)) raise InvalidInsight() if is_recent_image(product_images, image_id, max_timedelta): return True if is_selected_image(product_images, image_id): return True return False
def updated_product_update_insights(barcode: str): product = get_product(barcode) if product is None: logger.warn("Updated product does not exist: {}".format(barcode)) category_added = updated_product_add_category_insight(barcode, product) if category_added: logger.info("Product {} updated".format(barcode))
def predict_from_barcode( self, barcode: str, deepest_only: bool = False) -> Optional[List[CategoryPrediction]]: product = get_product(barcode, fields=["product_name", "ingredients_tags"]) if product is None: logger.info("Product {} not found".format(barcode)) return return self.predict_from_product(product, deepest_only=deepest_only)
def on_get(self, req: falcon.Request, resp: falcon.Response): barcode = req.get_param("barcode", required=True) deepest_only = req.get_param_as_bool("deepest_only", default=False) categories = [] product = get_product(barcode) if product: predictions = CategoryClassifier( get_taxonomy(TaxonomyType.category.name) ).predict(product, deepest_only) categories = [p.to_dict() for p in predictions] resp.media = {"categories": categories}
def update_product(self, insight: ProductInsight) -> AnnotationResult: product = get_product(insight.barcode, ['labels_tags']) if product is None: return MISSING_PRODUCT_RESULT labels_tags: List[str] = product.get('labels_tags') or [] if insight.value_tag in labels_tags: return ALREADY_ANNOTATED_RESULT add_label_tag(insight.barcode, insight.value_tag) return UPDATED_ANNOTATION_RESULT
def update_product( self, insight: ProductInsight, session_cookie: Optional[str] = None) -> AnnotationResult: if not product_exists(insight.barcode): return MISSING_PRODUCT_RESULT barcode = insight.barcode try: product_ingredient: ProductIngredient = ( ProductIngredient.select().where( ProductIngredient.barcode == barcode).get()) except ProductIngredient.DoesNotExist: logger.warning("Missing product ingredient for product " "{}".format(barcode)) return AnnotationResult(status="error_no_matching_ingredient", description="no ingredient is associated " "with insight (internal error)") ingredient_str = product_ingredient.ingredients product = get_product(barcode, fields=["ingredients_text"]) if product is None: logger.warning("Missing product: {}".format(barcode)) return MISSING_PRODUCT_RESULT expected_ingredients = product.get("ingredients_text") if expected_ingredients != ingredient_str: logger.warning("ingredients have changed since spellcheck insight " "creation (product {})".format(barcode)) return AnnotationResult( status=AnnotationStatus.error_updated_product.name, description="the ingredient list has been " "updated since spellcheck") full_correction = self.generate_full_correction( ingredient_str, insight.data['start_offset'], insight.data['end_offset'], insight.data['correction']) save_ingredients(barcode, full_correction, insight_id=insight.id, session_cookie=session_cookie) self.update_related_insights(insight) product_ingredient.ingredients = full_correction product_ingredient.save() return UPDATED_ANNOTATION_RESULT
def update_product(self, insight: ProductInsight) -> AnnotationResult: product = get_product(insight.barcode, ['quantity']) if product is None: return MISSING_PRODUCT_RESULT quantity: Optional[str] = product.get('quantity') or None if quantity is not None: return ALREADY_ANNOTATED_RESULT weight = insight.data['text'] update_quantity(insight.barcode, weight) return UPDATED_ANNOTATION_RESULT
def update_product(self, insight: ProductInsight) -> AnnotationResult: expiration_date: str = insight.data['text'] product = get_product(insight.barcode, ['expiration_date']) if product is None: return MISSING_PRODUCT_RESULT current_expiration_date = product.get('expiration_date') or None if current_expiration_date: return ALREADY_ANNOTATED_RESULT update_expiration_date(insight.barcode, expiration_date) return UPDATED_ANNOTATION_RESULT
def update_product(self, insight: ProductInsight) -> AnnotationResult: product = get_product(insight.barcode, ['categories_tags']) if product is None: return MISSING_PRODUCT_RESULT categories_tags: List[str] = product.get('categories_tags') or [] if insight.value_tag in categories_tags: return ALREADY_ANNOTATED_RESULT category_tag = insight.value_tag add_category(insight.barcode, category_tag) return UPDATED_ANNOTATION_RESULT
def update_product(self, insight: ProductInsight) -> AnnotationResult: brand: str = insight.data['brand'] product = get_product(insight.barcode, ['brands_tags']) if product is None: return MISSING_PRODUCT_RESULT brand_tags: List[str] = product.get('brands_tags') or [] if brand_tags: # For now, don't annotate if a brand has already been provided return ALREADY_ANNOTATED_RESULT add_brand(insight.barcode, brand) return UPDATED_ANNOTATION_RESULT
def update_product(self, insight: ProductInsight) -> AnnotationResult: store: str = insight.data['store'] store_tag: str = insight.value_tag product = get_product(insight.barcode, ['stores_tags']) if product is None: return MISSING_PRODUCT_RESULT stores_tags: List[str] = product.get('stores_tags') or [] if store_tag in stores_tags: return ALREADY_ANNOTATED_RESULT add_store(insight.barcode, store) return UPDATED_ANNOTATION_RESULT
def correct_ingredient( country: str, ingredient: str, pattern: str, correction: str, comment: str, auth: OFFAuthentication, dry_run: bool = False, ): if dry_run: print("*** Dry run ***") ingredient_field = "ingredients_text_{}".format(country) products = list(iter_products(country, ingredient)) print("{} products".format(len(products))) re_patterns = get_patterns(pattern, correction) for product in products: barcode = product.get("code") print( "Fixing {}/product/{}".format( BaseURLProvider().country(country).get(), barcode ) ) product = get_product(barcode, fields=[ingredient_field]) if product is None: print("Product not found: {}".format(barcode)) continue ingredients = product[ingredient_field] corrected = generate_correction(ingredients, re_patterns) if ingredients == corrected: print("No modification after correction, skipping") continue else: print(ingredients) print(corrected) print("-" * 15) if not dry_run: save_ingredients( barcode, corrected, lang=country, comment=comment, auth=auth )
def update_product( self, insight: ProductInsight, auth: Optional[OFFAuthentication] = None) -> AnnotationResult: product = get_product(insight.barcode, ["brands_tags"]) if product is None: return MISSING_PRODUCT_RESULT add_brand( insight.barcode, insight.value, insight_id=insight.id, server_domain=insight.server_domain, auth=auth, ) return UPDATED_ANNOTATION_RESULT
def spellcheck(self, req: falcon.Request, resp: falcon.Response): text = req.get_param("text") if text is None: barcode = req.get_param("barcode") if barcode is None: raise falcon.HTTPBadRequest("text or barcode is required.") product = get_product(barcode) or {} text = product.get("ingredients_text_fr") if text is None: resp.media = {"status": "not_found"} return index_name = req.get_param("index", default="product_all") confidence = req.get_param_as_float("confidence", default=0.5) pipeline = req.get_param_as_list("pipeline") or None safe = req.get_param_as_bool("safe", blank_as_true=False) if safe is not None and pipeline: raise falcon.HTTPBadRequest( "pipeline and safe parameters cannot be used together" ) if pipeline: for item in pipeline: if item not in SPELLCHECKERS: raise falcon.HTTPBadRequest(f"unknown pipeline item: {item}") elif safe: pipeline = ["patterns", "percentages", "vocabulary"] spellchecker = Spellchecker.load( client=es_client, pipeline=pipeline, index_name=index_name, confidence=confidence, ) correction_item = spellchecker.correct(text) resp.media = { "text": text, "corrected": correction_item.latest_correction, "corrections": correction_item.corrections, }
def update_product( self, insight: ProductInsight, auth: Optional[OFFAuthentication] = None) -> AnnotationResult: product = get_product(insight.barcode, ["expiration_date"]) if product is None: return MISSING_PRODUCT_RESULT current_expiration_date = product.get("expiration_date") or None if current_expiration_date: return ALREADY_ANNOTATED_RESULT update_expiration_date( insight.barcode, insight.value, insight_id=insight.id, server_domain=insight.server_domain, auth=auth, ) return UPDATED_ANNOTATION_RESULT
def update_product( self, insight: ProductInsight, auth: Optional[OFFAuthentication] = None) -> AnnotationResult: product = get_product(insight.barcode, ["stores_tags"]) if product is None: return MISSING_PRODUCT_RESULT stores_tags: List[str] = product.get("stores_tags") or [] if insight.value_tag in stores_tags: return ALREADY_ANNOTATED_RESULT add_store( insight.barcode, insight.value, insight_id=insight.id, server_domain=insight.server_domain, auth=auth, ) return UPDATED_ANNOTATION_RESULT
def get_random_insight(insight_type: str = None, country: str = None) -> Optional[ProductInsight]: attempts = 0 while True: attempts += 1 if attempts > 4: return None query = ProductInsight.select() where_clauses = [ProductInsight.annotation.is_null()] if country is not None: where_clauses.append(ProductInsight.countries.contains( country)) if insight_type is not None: where_clauses.append(ProductInsight.type == insight_type) query = query.where(*where_clauses).order_by(peewee.fn.Random()) insight_list = list(query.limit(1)) if not insight_list: return None insight = insight_list[0] # We only need to know if the product exists, so fetching barcode # is enough product = get_product(insight.barcode, ['code']) # Product may be None if not found if product: return insight else: insight.delete_instance() logger.info("Product not found, insight deleted")
def update_product( self, insight: ProductInsight, auth: Optional[OFFAuthentication] = None) -> AnnotationResult: product = get_product(insight.barcode, ["quantity"]) if product is None: return MISSING_PRODUCT_RESULT quantity: Optional[str] = product.get("quantity") or None if quantity is not None: return ALREADY_ANNOTATED_RESULT update_quantity( insight.barcode, insight.value, insight_id=insight.id, server_domain=insight.server_domain, auth=auth, ) return UPDATED_ANNOTATION_RESULT
def get_source_image_url(barcode: str) -> Optional[str]: product: Optional[JSONType] = get_product(barcode, fields=["selected_images"]) if product is None: return None if "selected_images" not in product: return None selected_images = product["selected_images"] for key in ("front", "ingredients", "nutrition"): if key in selected_images: images = selected_images[key] if "display" in images: display_images = list(images["display"].values()) if display_images: return display_images[0] return None
def get_source_image_url(barcode: str) -> Optional[str]: product: Optional[JSONType] = get_product(barcode, fields=['selected_images']) if product is None: return None if 'selected_images' not in product: return None selected_images = product['selected_images'] if 'front' not in selected_images: return None front_images = selected_images['front'] if 'display' in front_images: display_images = list(front_images['display'].values()) if display_images: return display_images[0] return None
def predict_from_barcode( self, barcode: str) -> Optional[List[CategoryPrediction]]: if not self.loaded: self.load() product = get_product(barcode, fields=["product_name", "ingredients_tags"]) if product is None: logger.info("Product {} not found".format(barcode)) return X = self.get_input_from_products([product])[0] X = [X[0].tolist(), X[1].tolist()] data = {"signature_name": "serving_default", "instances": [X]} r = http_session.post("{}/{}:predict".format(TF_SERVING_BASE_URL, self.NAME), json=data) r.raise_for_status() response = r.json() return response
category_to_id = load_category_vocabulary(model_dir) ingredient_to_id = load_ingredient_vocabulary(model_dir) category_names = [ category for category, _ in sorted(category_to_id.items(), key=operator.itemgetter(1)) ] nlp = get_nlp(config.lang) product_name_vocabulary = load_product_name_vocabulary(model_dir) model = keras.models.load_model(str(model_path)) while True: barcode = input("barcode: ").strip() product = get_product(barcode, fields=["product_name", "ingredients_tags"]) if product is None: print("Product {} not found".format(barcode)) continue X = generate_data( product=product, ingredient_to_id=ingredient_to_id, product_name_token_to_int=product_name_vocabulary, nlp=nlp, product_name_max_length=config.model_config.product_name_max_length, product_name_preprocessing_config=config. product_name_preprocessing_config, )