Example #1
0
def get_json_for_image(barcode: str, image_name: str) -> \
        Optional[JSONType]:
    url = generate_json_ocr_url(barcode, image_name)
    r = http_session.get(url)

    if r.status_code == 404:
        return None

    return r.json()
Example #2
0
def download_dataset(output_path: os.PathLike) -> str:
    r = http_session.get(settings.JSONL_DATASET_URL, stream=True)
    current_etag = r.headers.get("ETag", "").strip("'\"")

    logger.info("Dataset has changed, downloading file")
    logger.debug("Saving temporary file in {}".format(output_path))

    with open(output_path, "wb") as f:
        shutil.copyfileobj(r.raw, f)

    return current_etag
Example #3
0
def fetch_taxonomy(url: str, fallback_path: str, offline=False) \
        -> Optional[Taxonomy]:
    if offline:
        return Taxonomy.from_json(fallback_path)

    try:
        r = http_session.get(url, timeout=5)
        data = r.json()
    except Exception:
        if fallback_path:
            return Taxonomy.from_json(fallback_path)
        else:
            return None

    return Taxonomy.from_dict(data)
Example #4
0
def save_image(directory: pathlib.Path,
               image_meta: JSONType,
               barcode: str,
               override: bool = False):
    image_name = image_meta['imgid']
    image_full_name = "{}_{}.jpg".format(barcode, image_name)
    image_path = directory / image_full_name

    if image_path.exists() and not override:
        return

    image_url = generate_image_url(barcode, image_name)
    logger.info("Downloading image {}".format(image_url))
    r = http_session.get(image_url)

    with open(str(image_path), 'wb') as fd:
        logger.info("Saving image in {}".format(image_path))
        for chunk in r.iter_content(chunk_size=128):
            fd.write(chunk)
Example #5
0
def extract_ocr_insights(ocr_url: str,
                         insight_types: Iterable[str]) -> JSONType:
    r = http_session.get(ocr_url)
    r.raise_for_status()

    ocr_data: Dict = r.json()
    ocr_result = ocr.OCRResult.from_json(ocr_data)

    if ocr_result is None:
        logger.info("Error during OCR extraction: {}".format(ocr_url))
        return {}

    results = {}

    for insight_type in insight_types:
        insights = ocr.extract_insights(ocr_result, insight_type)

        if insights:
            results[insight_type] = insights

    return results
Example #6
0
def fetch_images_for_ean(ean: str):
    url = "https://world.openfoodfacts.org/api/v0/product/" \
          "{}.json?fields=images".format(ean)
    images = http_session.get(url).json()
    return images