Beispiel #1
0
def flag_image(ocr_result: OCRResult) -> List[Dict]:
    safe_search_annotation = ocr_result.get_safe_search_annotation()
    label_annotations = ocr_result.get_label_annotations()
    insights: List[Dict] = []

    if safe_search_annotation:
        for key in ('adult', 'violence'):
            value: SafeSearchAnnotationLikelihood = \
                getattr(safe_search_annotation, key)
            if value >= SafeSearchAnnotationLikelihood.VERY_LIKELY:
                insights.append({
                    'type': key,
                    'likelihood': value.name,
                })

    for label_annotation in label_annotations:
        if (label_annotation.description in ('Face', 'Head', 'Selfie')
                and label_annotation.score >= 0.8):
            insights.append({
                'type': label_annotation.description.lower(),
                'likelihood': label_annotation.score
            })
            break

    return insights
Beispiel #2
0
def find_expiration_date(ocr_result: OCRResult) -> List[Dict]:
    # Parse expiration date
    #        "À consommer de préférence avant",
    results = []

    for type_, ocr_regex in EXPIRATION_DATE_REGEX.items():
        text = ocr_result.get_text(ocr_regex)

        if not text:
            continue

        for match in ocr_regex.regex.finditer(text):
            raw = match.group(0)

            if not ocr_regex.processing_func:
                continue

            date = ocr_regex.processing_func(match)

            if date is None:
                continue

            if date.year > 2025 or date.year < 2015:
                continue

            value = date.strftime("%d/%m/%Y")

            results.append({
                "raw": raw,
                "text": value,
                "type": type_,
                "notify": ocr_regex.notify,
            })

    return results
Beispiel #3
0
def find_labels(ocr_result: OCRResult) -> List[Dict]:
    results = []

    for label_tag, regex_list in LABELS_REGEX.items():
        for ocr_regex in regex_list:
            text = ocr_result.get_text(ocr_regex)

            if not text:
                continue

            for match in ocr_regex.regex.finditer(text):
                if ocr_regex.processing_func:
                    label_value = ocr_regex.processing_func(match)
                else:
                    label_value = label_tag

                results.append({
                    'label_tag': label_value,
                    'text': match.group(),
                    'notify': ocr_regex.notify,
                })

    for logo_annotation in ocr_result.logo_annotations:
        if logo_annotation.description in LOGO_ANNOTATION_LABELS:
            label_tag = LOGO_ANNOTATION_LABELS[logo_annotation.description]

            results.append({
                'label_tag': label_tag,
                'automatic_processing': False,
                'confidence': logo_annotation.score,
                'model': 'google-cloud-vision',
            })

    return results
Beispiel #4
0
def find_nutrient_values(ocr_result: OCRResult) -> List[Dict]:
    nutrients: JSONType = {}

    for regex_code, ocr_regex in NUTRIENT_VALUES_REGEX.items():
        text = ocr_result.get_text(ocr_regex)

        if not text:
            continue

        for match in ocr_regex.regex.finditer(text):
            value = match.group(2).replace(',', '.')
            unit = match.group(3)
            nutrients.setdefault(regex_code, [])
            nutrients[regex_code].append({
                "raw": match.group(0),
                "nutrient": regex_code,
                'value': value,
                'unit': unit,
            })

    if not nutrients:
        return []

    return [
        {
            'nutrients': nutrients,
            'notify': False,
        }
    ]
Beispiel #5
0
def find_image_orientation(ocr_result: OCRResult) -> List[Dict]:
    orientation_result = ocr_result.get_orientation()

    if (orientation_result is None
            or orientation_result.orientation == ImageOrientation.up):
        return []

    return [orientation_result.to_json()]
Beispiel #6
0
    def get_text(ocr_result: OCRResult) -> str:
        """Extract text from the OCR result and prepare it.

        Args:
            ocr_result (OCRResult): The OCR result to process.

        Returns:
            str: The text extracted and prepared.
        """
        text = ocr_result.get_full_text()
        if text is None:
            # Using `OCRResult.text_annotations` directly instead of
            # `OCRResult.get_text_annotations()` because the latter contains
            # the text duplicated
            text = ocr_result.text_annotations[0].text
        return text
Beispiel #7
0
def find_packager_codes(ocr_result: OCRResult) -> List[Dict]:
    results = []

    for regex_code, ocr_regex in PACKAGER_CODE.items():
        text = ocr_result.get_text(ocr_regex)

        if not text:
            continue

        for match in ocr_regex.regex.finditer(text):
            if ocr_regex.processing_func is not None:
                value = ocr_regex.processing_func(match)
                results.append({
                    "raw": match.group(0),
                    "text": value,
                    "type": regex_code,
                    "notify": ocr_regex.notify,
                })

    return results
Beispiel #8
0
def find_product_weight(ocr_result: OCRResult) -> List[Dict]:
    results = []

    for type_, ocr_regex in PRODUCT_WEIGHT_REGEX.items():
        text = ocr_result.get_text(ocr_regex)

        if not text:
            continue

        for match in ocr_regex.regex.finditer(text):
            if ocr_regex.processing_func is None:
                continue

            result = ocr_regex.processing_func(match)
            result['matcher_type'] = type_
            result['priority'] = ocr_regex.priority
            result['notify'] = ocr_regex.notify
            results.append(result)

    return results
Beispiel #9
0
def find_traces(ocr_result: OCRResult) -> List[Dict]:
    results = []

    text = ocr_result.get_text(TRACES_REGEX)

    if not text:
        return []

    for match in TRACES_REGEX.regex.finditer(text):
        raw = match.group()
        end_idx = match.end()
        captured = text[end_idx:end_idx + 100]

        result = {
            'raw': raw,
            'text': captured,
            'notify': TRACES_REGEX.notify,
        }
        results.append(result)

    return results
Beispiel #10
0
def find_nutrient_values(ocr_result: OCRResult) -> List[Dict]:
    results = []

    for regex_code, ocr_regex in NUTRIENT_VALUES_REGEX.items():
        text = ocr_result.get_text(ocr_regex)

        if not text:
            continue

        for match in ocr_regex.regex.finditer(text):
            value = match.group(2).replace(',', '.')
            unit = match.group(3)
            results.append({
                "raw": match.group(0),
                "nutrient": regex_code,
                'value': value,
                'unit': unit,
                'notify': ocr_regex.notify,
            })

    return results
Beispiel #11
0
def find_stores(ocr_result: OCRResult) -> List[Dict]:
    results = []

    text = ocr_result.get_text(STORE_REGEX)

    if not text:
        return []

    for match in STORE_REGEX.regex.finditer(text):
        groups = match.groups()

        for idx, match_str in enumerate(groups):
            if match_str is not None:
                store, _ = SORTED_STORES[idx]
                results.append({
                    'store': store,
                    'store_tag': get_store_tag(store),
                    'text': match_str,
                    'notify': store in NOTIFY_STORES,
                })
                break

    return results
Beispiel #12
0
def find_brands(ocr_result: OCRResult) -> List[Dict]:
    results = []

    text = ocr_result.get_text(BRAND_REGEX)

    if not text:
        return []

    for match in BRAND_REGEX.regex.finditer(text):
        groups = match.groups()

        for idx, match_str in enumerate(groups):
            if match_str is not None:
                brand, _ = SORTED_BRANDS[idx]
                results.append({
                    'brand': brand,
                    'brand_tag': get_brand_tag(brand),
                    'text': match_str,
                    'notify': brand in NOTIFY_BRANDS,
                })
                return results

    for logo_annotation in ocr_result.logo_annotations:
        if logo_annotation.description in LOGO_ANNOTATION_BRANDS:
            brand = LOGO_ANNOTATION_BRANDS[logo_annotation.description]

            results.append({
                'brand': brand,
                'brand_tag': get_brand_tag(brand),
                'automatic_processing': False,
                'confidence': logo_annotation.score,
                'model': 'google-cloud-vision',
                'notify': False,
            })
            return results

    return results
Beispiel #13
0
def test_ocr_result_extraction_non_regression(ocr_name: str):
    with (data_dir / ocr_name).open("r") as f:
        data = json.load(f)

    result = OCRResult.from_json(data)
    assert result