def __init__(self, data: JSONType, lazy: bool = True): self.text_annotations: List[OCRTextAnnotation] = [] self.full_text_annotation: Optional[OCRFullTextAnnotation] = None self.logo_annotations: List[LogoAnnotation] = [] self.label_annotations: List[LabelAnnotation] = [] self.safe_search_annotation: Optional[SafeSearchAnnotation] = None for text_annotation_data in data.get("textAnnotations", []): text_annotation = OCRTextAnnotation(text_annotation_data) self.text_annotations.append(text_annotation) self.text_annotations_str: str = "" self.text_annotations_str_lower: str = "" if self.text_annotations: self.text_annotations_str = self.text_annotations[0].text self.text_annotations_str_lower = self.text_annotations_str.lower() full_text_annotation_data = data.get("fullTextAnnotation") if full_text_annotation_data: self.full_text_annotation = OCRFullTextAnnotation( full_text_annotation_data, lazy=lazy) for logo_annotation_data in data.get("logoAnnotations", []): logo_annotation = LogoAnnotation(logo_annotation_data) self.logo_annotations.append(logo_annotation) for label_annotation_data in data.get("labelAnnotations", []): label_annotation = LabelAnnotation(label_annotation_data) self.label_annotations.append(label_annotation) if "safeSearchAnnotation" in data: self.safe_search_annotation = SafeSearchAnnotation( data["safeSearchAnnotation"])
def exist_latent(latent_insight: JSONType) -> bool: return bool(ProductInsight.select().where( ProductInsight.barcode == latent_insight["barcode"], ProductInsight.type == latent_insight["type"], ProductInsight.server_domain == latent_insight["server_domain"], ProductInsight.value_tag == latent_insight.get("value_tag"), ProductInsight.value == latent_insight.get("value"), ProductInsight.source_image == latent_insight.get("source_image"), ).count())
def __init__(self, data: JSONType): self.bounding_poly = BoundingPoly(data['boundingBox']) self.text = data['text'] self.confidence = data.get('confidence', None) self.symbol_break: Optional[DetectedBreak] = None symbol_property = data.get('property', {}) if 'detectedBreak' in symbol_property: self.symbol_break = DetectedBreak(symbol_property['detectedBreak'])
def __init__(self, data: JSONType): self.bounding_poly = BoundingPoly(data["boundingBox"]) self.text = data["text"] self.confidence = data.get("confidence", None) self.symbol_break: Optional[DetectedBreak] = None symbol_property = data.get("property", {}) if "detectedBreak" in symbol_property: self.symbol_break = DetectedBreak(symbol_property["detectedBreak"])
def __init__(self, product: JSONType): self.barcode: Optional[str] = product.get("code") self.countries_tags: List[str] = product.get("countries_tags") or [] self.categories_tags: List[str] = product.get("categories_tags") or [] self.emb_codes_tags: List[str] = product.get("emb_codes_tags") or [] self.labels_tags: List[str] = product.get("labels_tags") or [] self.quantity: Optional[str] = product.get("quantity") or None self.expiration_date: Optional[str] = product.get( "expiration_date") or None self.brands_tags: List[str] = product.get("brands_tags") or [] self.stores_tags: List[str] = product.get("stores_tags") or [] self.unique_scans_n: int = product.get("unique_scans_n") or 0 self.images: JSONType = product.get("images") or {}
def __init__(self, product: JSONType): self.barcode = product.get("code") self.countries_tags = product.get("countries_tags") or [] self.categories_tags = product.get("categories_tags") or [] self.emb_codes_tags = product.get("emb_codes_tags") or [] self.labels_tags = product.get("labels_tags") or [] self.quantity = product.get("quantity") or None self.expiration_date = product.get("expiration_date") or None self.brands_tags = product.get("brands_tags") or [] self.stores_tags = product.get("stores_tags") or [] self.unique_scans_n = product.get("unique_scans_n") or 0
def from_dict(cls, data: JSONType) -> 'Taxonomy': taxonomy = Taxonomy() for key, key_data in data.items(): if key not in taxonomy: node = TaxonomyNode(identifier=key, names=key_data.get('name', {})) taxonomy.add(key, node) for key, key_data in data.items(): node = taxonomy[key] parents = [taxonomy[ref] for ref in key_data.get('parents', [])] node.add_parents(parents) return taxonomy
def updated_product_predict_insights( barcode: str, product: JSONType, server_domain: str ) -> bool: updated = add_category_insight(barcode, product, server_domain) product_name = product.get("product_name") if not product_name: return updated product_store = get_product_store() insights_all = get_insights_from_product_name(barcode, product_name) for insight_type, insights in insights_all.items(): importer = InsightImporterFactory.create(insight_type, product_store) imported = importer.import_insights( [insights], server_domain=server_domain, automatic=False ) if imported: logger.info( "{} insights ({}) imported for product {}".format( imported, insight_type, barcode ) ) updated = True return updated
def updated_product_add_category_insight(barcode: str, product: JSONType) -> bool: if product.get('categories_tags', []): return False insight = predict_category_from_product_es(product) if insight is None: insights = predict_category_from_product_ml(product, filter_blacklisted=True) if not insights: return False else: predicted = [ "{} ({})".format(insight["category"], insight["confidence"]) for insight in insights ] logger.info("Predicted categories for product {}: {}" "".format(barcode, predicted)) else: insights = [insight] product_store = CACHED_PRODUCT_STORE.get() importer = InsightImporterFactory.create(InsightType.category.name, product_store) imported = importer.import_insights(insights, automatic=False) if imported: logger.info("Category insight imported for product {}".format(barcode)) return bool(imported)
def __init__(self, product: JSONType): self.barcode = product.get('code') self.countries_tags = product.get('countries_tags') or [] self.categories_tags = product.get('categories_tags') or [] self.emb_codes_tags = product.get('emb_codes_tags') or [] self.labels_tags = product.get('labels_tags') or [] self.quantity = product.get('quantity') or None self.expiration_date = product.get('expiration_date') or None self.brands_tags = product.get('brands_tags') or [] self.stores_tags = product.get('stores_tags') or []
def from_dict(cls, data: JSONType) -> "Taxonomy": taxonomy = Taxonomy() for key, key_data in data.items(): if key not in taxonomy: node = TaxonomyNode( identifier=key, names=key_data.get("name", {}), synonyms=key_data.get("synonyms", None), ) taxonomy.add(key, node) for key, key_data in data.items(): node = taxonomy[key] parents = [taxonomy[ref] for ref in key_data.get("parents", [])] node.add_parents(parents) return taxonomy
def is_selected_image(product_images: JSONType, image_id: str) -> bool: for key_prefix in ("nutrition", "front", "ingredients"): for key, image in product_images.items(): if key.startswith(key_prefix): if image["imgid"] == image_id: logger.debug("Image {} is a selected image for " "'{}'".format(image_id, key_prefix)) return True return False
def __init__(self, data: JSONType): self.bounding_poly = BoundingPoly(data["boundingBox"]) self.symbols: List[Symbol] = [Symbol(s) for s in data["symbols"]] self.languages: Optional[List[DetectedLanguage]] = None word_property = data.get("property", {}) if "detectedLanguages" in word_property: self.languages = [ DetectedLanguage(l) for l in data["property"]["detectedLanguages"] ]
def print_generic_insight(insight: JSONType) -> None: for key, value in insight.items(): click.echo("{}: {}".format(key, str(value))) click.echo("url: {}/product/{}".format(settings.BaseURLProvider().get(), insight["barcode"])) if "source" in insight: click.echo("image: {}{}".format(settings.OFF_IMAGE_BASE_URL, insight["source"])) click.echo("")
def print_generic_insight(insight: JSONType) -> None: for key, value in insight.items(): click.echo("{}: {}".format(key, str(value))) click.echo("url: {}".format("https://fr.openfoodfacts.org/produit/" "{}".format(insight["barcode"]))) if "source" in insight: click.echo("image: {}{}".format(STATIC_IMAGE_DIR_URL, insight["source"])) click.echo("")
def print_ingredient_spellcheck_insight(insight: JSONType) -> None: for key in ("id", "type", "barcode", "countries"): value = insight.get(key) click.echo("{}: {}".format(key, str(value))) click.echo("url: {}/product/{}".format(settings.BaseURLProvider().get(), insight["barcode"])) original_snippet = insight["original_snippet"] corrected_snippet = insight["corrected_snippet"] click.echo(generate_colored_diff(original_snippet, corrected_snippet)) click.echo("")
def print_ingredient_spellcheck_insight(insight: JSONType) -> None: for key in ('id', 'type', 'barcode', 'countries'): value = insight.get(key) click.echo('{}: {}'.format(key, str(value))) click.echo("url: {}".format("https://fr.openfoodfacts.org/produit/" "{}".format(insight['barcode']))) original_snippet = insight['original_snippet'] corrected_snippet = insight['corrected_snippet'] click.echo(generate_colored_diff(original_snippet, corrected_snippet)) click.echo("")
def print_ingredient_spellcheck_insight(insight: JSONType) -> None: for key in ("id", "type", "barcode", "countries"): value = insight.get(key) click.echo("{}: {}".format(key, str(value))) click.echo("url: {}".format("https://fr.openfoodfacts.org/produit/" "{}".format(insight["barcode"]))) original_snippet = insight["original_snippet"] corrected_snippet = insight["corrected_snippet"] click.echo(generate_colored_diff(original_snippet, corrected_snippet)) click.echo("")
def from_json(cls, data: JSONType) -> Optional['OCRResult']: responses = data.get('responses', []) if not responses: return None response = responses[0] if 'error' in response: return None return OCRResult(response)
def __init__(self, data: JSONType): self.bounding_poly = BoundingPoly(data['boundingBox']) self.symbols: List[Symbol] = [Symbol(s) for s in data['symbols']] self.languages: Optional[List[DetectedLanguage]] = None word_property = data.get('property', {}) if 'detectedLanguages' in word_property: self.languages: List[DetectedLanguage] = [ DetectedLanguage(l) for l in data['property']['detectedLanguages'] ]
def __init__(self, data: JSONType): self.text_annotations: List[OCRTextAnnotation] = [] self.full_text_annotation: Optional[OCRFullTextAnnotation] = None self.logo_annotations: List[LogoAnnotation] = [] self.label_annotations: List[LabelAnnotation] = [] self.safe_search_annotation: Optional[SafeSearchAnnotation] = None for text_annotation_data in data.get('textAnnotations', []): text_annotation = OCRTextAnnotation(text_annotation_data) self.text_annotations.append(text_annotation) self.text_annotations_str: Optional[str] = None self.text_annotations_str_lower: Optional[str] = None if self.text_annotations: self.text_annotations_str = '||'.join( t.text for t in self.text_annotations) self.text_annotations_str_lower = ( self.text_annotations_str.lower()) full_text_annotation_data = data.get('fullTextAnnotation') if full_text_annotation_data: self.full_text_annotation = OCRFullTextAnnotation( full_text_annotation_data) for logo_annotation_data in data.get('logoAnnotations', []): logo_annotation = LogoAnnotation(logo_annotation_data) self.logo_annotations.append(logo_annotation) for label_annotation_data in data.get('labelAnnotations', []): label_annotation = LabelAnnotation(label_annotation_data) self.label_annotations.append(label_annotation) if 'safeSearchAnnotation' in data: self.safe_search_annotation = SafeSearchAnnotation( data['safeSearchAnnotation'])
def find_nutrition_image_nutrient_languages( mentions: JSONType, ) -> Dict[str, Dict[str, int]]: languages: Dict[str, Dict[str, int]] = {} for nutrient, matches in mentions.items(): seen_lang: Set[str] = set() for match in matches: for lang in match.get("languages", []): if lang not in seen_lang: languages.setdefault(nutrient, {}) nutrient_languages = languages[nutrient] nutrient_languages.setdefault(lang, 0) nutrient_languages[lang] += 1 seen_lang.add(lang) return languages
def updated_product_predict_insights(barcode: str, product: JSONType, server_domain: str) -> bool: updated = add_category_insight(barcode, product, server_domain) product_name = product.get("product_name") if not product_name: return updated logger.info("Generating predictions from product name...") predictions_all = get_predictions_from_product_name(barcode, product_name) imported = import_insights(predictions_all, server_domain, automatic=False) logger.info(f"{imported} insights imported for product {barcode}") if imported: updated = True return updated
def is_recent_image(product_images: JSONType, image_id: str, max_timedelta: datetime.timedelta) -> bool: upload_datetimes = [] insight_image_upload_datetime: Optional[datetime.datetime] = None for key, image_meta in product_images.items(): if not key.isdigit(): continue upload_datetime = datetime.datetime.utcfromtimestamp( int(image_meta["uploaded_t"])) if key == image_id: insight_image_upload_datetime = upload_datetime else: upload_datetimes.append(upload_datetime) if not upload_datetimes: logger.debug("No other images") return True if insight_image_upload_datetime is None: raise ValueError("Image with ID {} not found".format(image_id)) else: for upload_datetime in upload_datetimes: if upload_datetime - insight_image_upload_datetime > max_timedelta: logger.debug("More recent image: {} > {}".format( upload_datetime, insight_image_upload_datetime)) return False sorted_datetimes = [ str(x) for x in sorted(set(x.date() for x in upload_datetimes), reverse=True) ] logger.debug( "All images were uploaded the same day or before the target " "image:\n{} >= {}".format(insight_image_upload_datetime.date(), ", ".join(sorted_datetimes))) return True logger.debug("More recent images: {} < {}".format( insight_image_upload_datetime.date(), max(x.date() for x in upload_datetimes), )) return False
def is_special_image(images: JSONType, image_path: str, image_type: str, lang: Optional[str] = None) -> bool: if not is_valid_image(images, image_path): return False image_id = pathlib.Path(image_path).stem for image_key, image_data in images.items(): if (image_key.startswith(image_type) and str(image_data.get("imgid")) == image_id): if lang is None: return True elif image_key.endswith("_{}".format(lang)): return True return False
def from_json(cls, data: JSONType, **kwargs) -> Optional["OCRResult"]: responses = data.get("responses", []) if not responses: return None try: response = responses[0] except IndexError: return None if "error" in response: logger.info("error in OCR response: " "{}".format(response["error"])) return None try: return OCRResult(response, **kwargs) except Exception as e: raise OCRParsingException("error during OCR parsing") from e
def from_json(cls, data: JSONType) -> Optional['OCRResult']: responses = data.get('responses', []) if not responses: return None try: response = responses[0] except IndexError: return None if 'error' in response: logger.info("error in OCR response: " "{}".format(response['error'])) return None try: return OCRResult(response) except Exception as e: raise OCRParsingException("error during OCR parsing") from e
def updated_product_add_category_insight(barcode: str, product: JSONType) -> bool: if product.get('categories_tags', []): return False insight = predict_from_product(product) if insight is None: return False product_store = CACHED_PRODUCT_STORE.get() importer = InsightImporterFactory.create(InsightType.category.name, product_store) imported = importer.import_insights([insight], automatic=False) if imported: logger.info("Category insight imported for product {}".format(barcode)) return bool(imported)
def __init__(self, data: JSONType): self.id = data.get("mid") or None self.score = data["score"] self.description = data["description"]
def __init__(self, data: JSONType): self.locale = data.get("locale") self.text = data["description"] self.bounding_poly = BoundingPoly(data["boundingPoly"])