Example #1
0
    def __init__(self, data: JSONType, lazy: bool = True):
        self.text_annotations: List[OCRTextAnnotation] = []
        self.full_text_annotation: Optional[OCRFullTextAnnotation] = None
        self.logo_annotations: List[LogoAnnotation] = []
        self.label_annotations: List[LabelAnnotation] = []
        self.safe_search_annotation: Optional[SafeSearchAnnotation] = None

        for text_annotation_data in data.get("textAnnotations", []):
            text_annotation = OCRTextAnnotation(text_annotation_data)
            self.text_annotations.append(text_annotation)

        self.text_annotations_str: str = ""
        self.text_annotations_str_lower: str = ""

        if self.text_annotations:
            self.text_annotations_str = self.text_annotations[0].text
            self.text_annotations_str_lower = self.text_annotations_str.lower()

        full_text_annotation_data = data.get("fullTextAnnotation")

        if full_text_annotation_data:
            self.full_text_annotation = OCRFullTextAnnotation(
                full_text_annotation_data, lazy=lazy)

        for logo_annotation_data in data.get("logoAnnotations", []):
            logo_annotation = LogoAnnotation(logo_annotation_data)
            self.logo_annotations.append(logo_annotation)

        for label_annotation_data in data.get("labelAnnotations", []):
            label_annotation = LabelAnnotation(label_annotation_data)
            self.label_annotations.append(label_annotation)

        if "safeSearchAnnotation" in data:
            self.safe_search_annotation = SafeSearchAnnotation(
                data["safeSearchAnnotation"])
Example #2
0
def exist_latent(latent_insight: JSONType) -> bool:
    return bool(ProductInsight.select().where(
        ProductInsight.barcode == latent_insight["barcode"],
        ProductInsight.type == latent_insight["type"],
        ProductInsight.server_domain == latent_insight["server_domain"],
        ProductInsight.value_tag == latent_insight.get("value_tag"),
        ProductInsight.value == latent_insight.get("value"),
        ProductInsight.source_image == latent_insight.get("source_image"),
    ).count())
Example #3
0
    def __init__(self, data: JSONType):
        self.bounding_poly = BoundingPoly(data['boundingBox'])
        self.text = data['text']
        self.confidence = data.get('confidence', None)

        self.symbol_break: Optional[DetectedBreak] = None
        symbol_property = data.get('property', {})

        if 'detectedBreak' in symbol_property:
            self.symbol_break = DetectedBreak(symbol_property['detectedBreak'])
Example #4
0
    def __init__(self, data: JSONType):
        self.bounding_poly = BoundingPoly(data["boundingBox"])
        self.text = data["text"]
        self.confidence = data.get("confidence", None)

        self.symbol_break: Optional[DetectedBreak] = None
        symbol_property = data.get("property", {})

        if "detectedBreak" in symbol_property:
            self.symbol_break = DetectedBreak(symbol_property["detectedBreak"])
Example #5
0
 def __init__(self, product: JSONType):
     self.barcode: Optional[str] = product.get("code")
     self.countries_tags: List[str] = product.get("countries_tags") or []
     self.categories_tags: List[str] = product.get("categories_tags") or []
     self.emb_codes_tags: List[str] = product.get("emb_codes_tags") or []
     self.labels_tags: List[str] = product.get("labels_tags") or []
     self.quantity: Optional[str] = product.get("quantity") or None
     self.expiration_date: Optional[str] = product.get(
         "expiration_date") or None
     self.brands_tags: List[str] = product.get("brands_tags") or []
     self.stores_tags: List[str] = product.get("stores_tags") or []
     self.unique_scans_n: int = product.get("unique_scans_n") or 0
     self.images: JSONType = product.get("images") or {}
Example #6
0
 def __init__(self, product: JSONType):
     self.barcode = product.get("code")
     self.countries_tags = product.get("countries_tags") or []
     self.categories_tags = product.get("categories_tags") or []
     self.emb_codes_tags = product.get("emb_codes_tags") or []
     self.labels_tags = product.get("labels_tags") or []
     self.quantity = product.get("quantity") or None
     self.expiration_date = product.get("expiration_date") or None
     self.brands_tags = product.get("brands_tags") or []
     self.stores_tags = product.get("stores_tags") or []
     self.unique_scans_n = product.get("unique_scans_n") or 0
Example #7
0
    def from_dict(cls, data: JSONType) -> 'Taxonomy':
        taxonomy = Taxonomy()

        for key, key_data in data.items():
            if key not in taxonomy:
                node = TaxonomyNode(identifier=key,
                                    names=key_data.get('name', {}))
                taxonomy.add(key, node)

        for key, key_data in data.items():
            node = taxonomy[key]
            parents = [taxonomy[ref] for ref in key_data.get('parents', [])]
            node.add_parents(parents)

        return taxonomy
Example #8
0
def updated_product_predict_insights(
    barcode: str, product: JSONType, server_domain: str
) -> bool:
    updated = add_category_insight(barcode, product, server_domain)
    product_name = product.get("product_name")

    if not product_name:
        return updated

    product_store = get_product_store()
    insights_all = get_insights_from_product_name(barcode, product_name)

    for insight_type, insights in insights_all.items():
        importer = InsightImporterFactory.create(insight_type, product_store)
        imported = importer.import_insights(
            [insights], server_domain=server_domain, automatic=False
        )

        if imported:
            logger.info(
                "{} insights ({}) imported for product {}".format(
                    imported, insight_type, barcode
                )
            )
            updated = True

    return updated
Example #9
0
def updated_product_add_category_insight(barcode: str,
                                         product: JSONType) -> bool:
    if product.get('categories_tags', []):
        return False

    insight = predict_category_from_product_es(product)

    if insight is None:
        insights = predict_category_from_product_ml(product,
                                                    filter_blacklisted=True)

        if not insights:
            return False
        else:
            predicted = [
                "{} ({})".format(insight["category"], insight["confidence"])
                for insight in insights
            ]
            logger.info("Predicted categories for product {}: {}"
                        "".format(barcode, predicted))
    else:
        insights = [insight]

    product_store = CACHED_PRODUCT_STORE.get()
    importer = InsightImporterFactory.create(InsightType.category.name,
                                             product_store)

    imported = importer.import_insights(insights, automatic=False)

    if imported:
        logger.info("Category insight imported for product {}".format(barcode))

    return bool(imported)
Example #10
0
 def __init__(self, product: JSONType):
     self.barcode = product.get('code')
     self.countries_tags = product.get('countries_tags') or []
     self.categories_tags = product.get('categories_tags') or []
     self.emb_codes_tags = product.get('emb_codes_tags') or []
     self.labels_tags = product.get('labels_tags') or []
     self.quantity = product.get('quantity') or None
     self.expiration_date = product.get('expiration_date') or None
     self.brands_tags = product.get('brands_tags') or []
     self.stores_tags = product.get('stores_tags') or []
Example #11
0
    def from_dict(cls, data: JSONType) -> "Taxonomy":
        taxonomy = Taxonomy()

        for key, key_data in data.items():
            if key not in taxonomy:
                node = TaxonomyNode(
                    identifier=key,
                    names=key_data.get("name", {}),
                    synonyms=key_data.get("synonyms", None),
                )
                taxonomy.add(key, node)

        for key, key_data in data.items():
            node = taxonomy[key]
            parents = [taxonomy[ref] for ref in key_data.get("parents", [])]
            node.add_parents(parents)

        return taxonomy
Example #12
0
def is_selected_image(product_images: JSONType, image_id: str) -> bool:
    for key_prefix in ("nutrition", "front", "ingredients"):
        for key, image in product_images.items():
            if key.startswith(key_prefix):
                if image["imgid"] == image_id:
                    logger.debug("Image {} is a selected image for "
                                 "'{}'".format(image_id, key_prefix))
                    return True

    return False
Example #13
0
    def __init__(self, data: JSONType):
        self.bounding_poly = BoundingPoly(data["boundingBox"])
        self.symbols: List[Symbol] = [Symbol(s) for s in data["symbols"]]

        self.languages: Optional[List[DetectedLanguage]] = None
        word_property = data.get("property", {})

        if "detectedLanguages" in word_property:
            self.languages = [
                DetectedLanguage(l) for l in data["property"]["detectedLanguages"]
            ]
Example #14
0
def print_generic_insight(insight: JSONType) -> None:
    for key, value in insight.items():
        click.echo("{}: {}".format(key, str(value)))

    click.echo("url: {}/product/{}".format(settings.BaseURLProvider().get(),
                                           insight["barcode"]))

    if "source" in insight:
        click.echo("image: {}{}".format(settings.OFF_IMAGE_BASE_URL,
                                        insight["source"]))
    click.echo("")
Example #15
0
def print_generic_insight(insight: JSONType) -> None:
    for key, value in insight.items():
        click.echo("{}: {}".format(key, str(value)))

    click.echo("url: {}".format("https://fr.openfoodfacts.org/produit/"
                                "{}".format(insight["barcode"])))

    if "source" in insight:
        click.echo("image: {}{}".format(STATIC_IMAGE_DIR_URL,
                                        insight["source"]))
    click.echo("")
Example #16
0
def print_ingredient_spellcheck_insight(insight: JSONType) -> None:
    for key in ("id", "type", "barcode", "countries"):
        value = insight.get(key)
        click.echo("{}: {}".format(key, str(value)))

    click.echo("url: {}/product/{}".format(settings.BaseURLProvider().get(),
                                           insight["barcode"]))

    original_snippet = insight["original_snippet"]
    corrected_snippet = insight["corrected_snippet"]
    click.echo(generate_colored_diff(original_snippet, corrected_snippet))
    click.echo("")
Example #17
0
def print_ingredient_spellcheck_insight(insight: JSONType) -> None:
    for key in ('id', 'type', 'barcode', 'countries'):
        value = insight.get(key)
        click.echo('{}: {}'.format(key, str(value)))

    click.echo("url: {}".format("https://fr.openfoodfacts.org/produit/"
                                "{}".format(insight['barcode'])))

    original_snippet = insight['original_snippet']
    corrected_snippet = insight['corrected_snippet']
    click.echo(generate_colored_diff(original_snippet, corrected_snippet))
    click.echo("")
Example #18
0
def print_ingredient_spellcheck_insight(insight: JSONType) -> None:
    for key in ("id", "type", "barcode", "countries"):
        value = insight.get(key)
        click.echo("{}: {}".format(key, str(value)))

    click.echo("url: {}".format("https://fr.openfoodfacts.org/produit/"
                                "{}".format(insight["barcode"])))

    original_snippet = insight["original_snippet"]
    corrected_snippet = insight["corrected_snippet"]
    click.echo(generate_colored_diff(original_snippet, corrected_snippet))
    click.echo("")
Example #19
0
    def from_json(cls, data: JSONType) -> Optional['OCRResult']:
        responses = data.get('responses', [])

        if not responses:
            return None

        response = responses[0]

        if 'error' in response:
            return None

        return OCRResult(response)
Example #20
0
    def __init__(self, data: JSONType):
        self.bounding_poly = BoundingPoly(data['boundingBox'])
        self.symbols: List[Symbol] = [Symbol(s) for s in data['symbols']]

        self.languages: Optional[List[DetectedLanguage]] = None
        word_property = data.get('property', {})

        if 'detectedLanguages' in word_property:
            self.languages: List[DetectedLanguage] = [
                DetectedLanguage(l)
                for l in data['property']['detectedLanguages']
            ]
Example #21
0
    def __init__(self, data: JSONType):
        self.text_annotations: List[OCRTextAnnotation] = []
        self.full_text_annotation: Optional[OCRFullTextAnnotation] = None
        self.logo_annotations: List[LogoAnnotation] = []
        self.label_annotations: List[LabelAnnotation] = []
        self.safe_search_annotation: Optional[SafeSearchAnnotation] = None

        for text_annotation_data in data.get('textAnnotations', []):
            text_annotation = OCRTextAnnotation(text_annotation_data)
            self.text_annotations.append(text_annotation)

        self.text_annotations_str: Optional[str] = None
        self.text_annotations_str_lower: Optional[str] = None

        if self.text_annotations:
            self.text_annotations_str = '||'.join(
                t.text for t in self.text_annotations)
            self.text_annotations_str_lower = (
                self.text_annotations_str.lower())

        full_text_annotation_data = data.get('fullTextAnnotation')

        if full_text_annotation_data:
            self.full_text_annotation = OCRFullTextAnnotation(
                full_text_annotation_data)

        for logo_annotation_data in data.get('logoAnnotations', []):
            logo_annotation = LogoAnnotation(logo_annotation_data)
            self.logo_annotations.append(logo_annotation)

        for label_annotation_data in data.get('labelAnnotations', []):
            label_annotation = LabelAnnotation(label_annotation_data)
            self.label_annotations.append(label_annotation)

        if 'safeSearchAnnotation' in data:
            self.safe_search_annotation = SafeSearchAnnotation(
                data['safeSearchAnnotation'])
Example #22
0
def find_nutrition_image_nutrient_languages(
    mentions: JSONType, ) -> Dict[str, Dict[str, int]]:
    languages: Dict[str, Dict[str, int]] = {}
    for nutrient, matches in mentions.items():
        seen_lang: Set[str] = set()

        for match in matches:
            for lang in match.get("languages", []):
                if lang not in seen_lang:
                    languages.setdefault(nutrient, {})
                    nutrient_languages = languages[nutrient]
                    nutrient_languages.setdefault(lang, 0)
                    nutrient_languages[lang] += 1
                    seen_lang.add(lang)

    return languages
Example #23
0
def updated_product_predict_insights(barcode: str, product: JSONType,
                                     server_domain: str) -> bool:
    updated = add_category_insight(barcode, product, server_domain)
    product_name = product.get("product_name")

    if not product_name:
        return updated

    logger.info("Generating predictions from product name...")
    predictions_all = get_predictions_from_product_name(barcode, product_name)
    imported = import_insights(predictions_all, server_domain, automatic=False)
    logger.info(f"{imported} insights imported for product {barcode}")

    if imported:
        updated = True

    return updated
Example #24
0
def is_recent_image(product_images: JSONType, image_id: str,
                    max_timedelta: datetime.timedelta) -> bool:
    upload_datetimes = []
    insight_image_upload_datetime: Optional[datetime.datetime] = None

    for key, image_meta in product_images.items():
        if not key.isdigit():
            continue

        upload_datetime = datetime.datetime.utcfromtimestamp(
            int(image_meta["uploaded_t"]))
        if key == image_id:
            insight_image_upload_datetime = upload_datetime
        else:
            upload_datetimes.append(upload_datetime)

    if not upload_datetimes:
        logger.debug("No other images")
        return True

    if insight_image_upload_datetime is None:
        raise ValueError("Image with ID {} not found".format(image_id))

    else:
        for upload_datetime in upload_datetimes:
            if upload_datetime - insight_image_upload_datetime > max_timedelta:
                logger.debug("More recent image: {} > {}".format(
                    upload_datetime, insight_image_upload_datetime))
                return False

        sorted_datetimes = [
            str(x) for x in sorted(set(x.date() for x in upload_datetimes),
                                   reverse=True)
        ]
        logger.debug(
            "All images were uploaded the same day or before the target "
            "image:\n{} >= {}".format(insight_image_upload_datetime.date(),
                                      ", ".join(sorted_datetimes)))
        return True

    logger.debug("More recent images: {} < {}".format(
        insight_image_upload_datetime.date(),
        max(x.date() for x in upload_datetimes),
    ))
    return False
Example #25
0
def is_special_image(images: JSONType,
                     image_path: str,
                     image_type: str,
                     lang: Optional[str] = None) -> bool:
    if not is_valid_image(images, image_path):
        return False

    image_id = pathlib.Path(image_path).stem

    for image_key, image_data in images.items():
        if (image_key.startswith(image_type)
                and str(image_data.get("imgid")) == image_id):
            if lang is None:
                return True

            elif image_key.endswith("_{}".format(lang)):
                return True

    return False
Example #26
0
    def from_json(cls, data: JSONType, **kwargs) -> Optional["OCRResult"]:
        responses = data.get("responses", [])

        if not responses:
            return None

        try:
            response = responses[0]
        except IndexError:
            return None

        if "error" in response:
            logger.info("error in OCR response: " "{}".format(response["error"]))
            return None

        try:
            return OCRResult(response, **kwargs)
        except Exception as e:
            raise OCRParsingException("error during OCR parsing") from e
Example #27
0
    def from_json(cls, data: JSONType) -> Optional['OCRResult']:
        responses = data.get('responses', [])

        if not responses:
            return None

        try:
            response = responses[0]
        except IndexError:
            return None

        if 'error' in response:
            logger.info("error in OCR response: "
                        "{}".format(response['error']))
            return None

        try:
            return OCRResult(response)
        except Exception as e:
            raise OCRParsingException("error during OCR parsing") from e
Example #28
0
def updated_product_add_category_insight(barcode: str,
                                         product: JSONType) -> bool:
    if product.get('categories_tags', []):
        return False

    insight = predict_from_product(product)

    if insight is None:
        return False

    product_store = CACHED_PRODUCT_STORE.get()
    importer = InsightImporterFactory.create(InsightType.category.name,
                                             product_store)

    imported = importer.import_insights([insight], automatic=False)

    if imported:
        logger.info("Category insight imported for product {}".format(barcode))

    return bool(imported)
Example #29
0
 def __init__(self, data: JSONType):
     self.id = data.get("mid") or None
     self.score = data["score"]
     self.description = data["description"]
Example #30
0
 def __init__(self, data: JSONType):
     self.locale = data.get("locale")
     self.text = data["description"]
     self.bounding_poly = BoundingPoly(data["boundingPoly"])