Esempio n. 1
0
def save_nearest_neighbors(logos: List[LogoAnnotation]) -> int:
    logo_ids_params = ",".join((str(logo.id) for logo in logos))
    r = http_session.get(
        f"https://robotoff.openfoodfacts.org/api/v1/ann/batch?logo_ids={logo_ids_params}",
        timeout=30,
    )

    response = r.json()
    results = {int(key): value for key, value in response["results"].items()}

    logo_id_to_logo = {logo.id: logo for logo in logos}
    missing_logo_ids = set(logo_id_to_logo.keys()).difference(
        set(results.keys()))

    if missing_logo_ids:
        logger.warning(f"Missing logo IDs in response: {missing_logo_ids}")

    saved = 0
    for logo_id, logo_results in results.items():
        if logo_id in logo_id_to_logo:
            logo = logo_id_to_logo[logo_id]
            distances = [n["distance"] for n in logo_results]
            logo_ids = [n["logo_id"] for n in logo_results]
            logo.nearest_neighbors = {
                "distances": distances,
                "logo_ids": logo_ids,
            }
            logo.save()
            saved += 1

    return saved
Esempio n. 2
0
def fetch_images_for_ean(ean: str):
    url = (
        "https://world.openfoodfacts.org/api/v0/product/"
        "{}.json?fields=images".format(ean)
    )
    images = http_session.get(url).json()
    return images
Esempio n. 3
0
def iter_products(country: str, ingredient: str):
    ingredient_field = f"ingredients_text_{country}"
    base_url = BaseURLProvider().country(country).get() + "/ingredient"
    url = base_url + f"/{ingredient}/1.json?fields=code,{ingredient_field}"
    r = http_session.get(url)
    data = r.json()
    count = data["count"]
    page_size = data["page_size"]
    yield from data["products"]

    pages = count // page_size + int(count % page_size != 0)
    for page in range(2, pages):
        url = base_url + f"/{ingredient}/{page}.json?fields=code,{ingredient_field}"
        r = http_session.get(url)
        data = r.json()
        yield from data["products"]
Esempio n. 4
0
def iter_products(country: str, ingredient: str):
    ingredient_field = f"ingredients_text_{country}"
    base_url = f"https://{country}.openfoodfacts.org/ingredient"
    url = base_url + f"/{ingredient}/1.json?fields=code,{ingredient_field}"
    r = http_session.get(url)
    data = r.json()
    count = data["count"]
    page_size = data["page_size"]
    yield from data["products"]

    pages = count // page_size + int(count % page_size != 0)
    for page in range(2, pages):
        url = base_url + f"/{ingredient}/{page}.json?fields=code,{ingredient_field}"
        r = http_session.get(url)
        data = r.json()
        yield from data["products"]
Esempio n. 5
0
def save_nearest_neighbors(logos: List[LogoAnnotation]) -> int:
    logo_ids_params = ",".join((str(logo.id) for logo in logos))
    r = http_session.get(
        settings.BaseURLProvider().robotoff().get() +
        "/api/v1/ann/batch?logo_ids=" + logo_ids_params,
        timeout=30,
    )

    r.raise_for_status()
    response = r.json()

    results = {int(key): value for key, value in response["results"].items()}

    logo_id_to_logo = {logo.id: logo for logo in logos}
    missing_logo_ids = set(logo_id_to_logo.keys()).difference(
        set(results.keys()))

    if missing_logo_ids:
        logger.warning("Missing logo IDs in response: %s", missing_logo_ids)

    saved = 0
    for logo_id, logo_results in results.items():
        if logo_id in logo_id_to_logo:
            logo = logo_id_to_logo[logo_id]
            distances = [n["distance"] for n in logo_results]
            logo_ids = [n["logo_id"] for n in logo_results]
            logo.nearest_neighbors = {
                "distances": distances,
                "logo_ids": logo_ids,
            }
            logo.save()
            saved += 1

    return saved
Esempio n. 6
0
def get_product(
    barcode: str,
    fields: List[str] = None,
    server: Optional[Union[ServerType, str]] = None,
    timeout: Optional[int] = 10,
) -> Optional[Dict]:
    fields = fields or []

    if server is None:
        server = ServerType.off

    url = get_api_product_url(server) + "/{}.json".format(barcode)

    if fields:
        # requests escape comma in URLs, as expected, but openfoodfacts server
        # does not recognize escaped commas.
        # See https://github.com/openfoodfacts/openfoodfacts-server/issues/1607
        url += "?fields={}".format(",".join(fields))

    r = http_session.get(url, timeout=timeout)

    if r.status_code != 200:
        return None

    data = r.json()

    if data["status_verbose"] != "product found":
        return None

    return data["product"]
Esempio n. 7
0
def get_json_for_image(barcode: str, image_id: str) -> Optional[JSONType]:
    url = generate_json_ocr_url(barcode, image_id)
    r = http_session.get(url)

    if r.status_code == 404:
        return None

    return r.json()
Esempio n. 8
0
def download_dataset(output_path: os.PathLike) -> str:
    r = http_session.get(settings.JSONL_DATASET_URL, stream=True)
    current_etag = r.headers.get("ETag", "").strip("'\"")

    logger.info("Dataset has changed, downloading file")
    logger.debug("Saving temporary file in {}".format(output_path))

    with open(output_path, "wb") as f:
        shutil.copyfileobj(r.raw, f)

    return current_etag
Esempio n. 9
0
def get_stored_logo_ids() -> Set[int]:
    r = http_session.get(
        "https://robotoff.openfoodfacts.org/api/v1/ann/stored", timeout=30)

    if not r.ok:
        logger.warning(
            f"error while fetching stored logo IDs ({r.status_code}): {r.text}"
        )
        return set()

    return set(r.json()["stored"])
Esempio n. 10
0
def get_stored_logo_ids() -> Set[int]:
    r = http_session.get(settings.BaseURLProvider().robotoff().get() +
                         "/api/v1/ann/stored",
                         timeout=30)

    if not r.ok:
        logger.warning(
            f"error while fetching stored logo IDs ({r.status_code}): %s",
            r.text)
        return set()

    return set(r.json()["stored"])
Esempio n. 11
0
def move_to(barcode: str, to: ServerType, timeout: Optional[int] = 10) -> bool:
    if get_product(barcode, server=to) is not None:
        return False

    url = "{}/cgi/product_jqm.pl".format(settings.OFF_BASE_WEBSITE_URL)
    params = {
        "type": "edit",
        "code": barcode,
        "new_code": to,
        **AUTH_DICT,
    }
    r = http_session.get(url, params=params, timeout=timeout)
    data = r.json()
    return data["status"] == 1
Esempio n. 12
0
def move_to(barcode: str, to: ServerType, timeout: Optional[int] = 10) -> bool:
    if get_product(barcode, server=to) is not None:
        return False

    url = "{}/cgi/product_jqm.pl".format(settings.BaseURLProvider().get())
    params = {
        "type": "edit",
        "code": barcode,
        "new_code": str(to),
        **settings.off_credentials(),
    }
    r = http_session.get(url, params=params, timeout=timeout)
    data = r.json()
    return data["status"] == 1
Esempio n. 13
0
def update_product(
    params: Dict,
    server_domain: Optional[str] = None,
    auth: Optional[OFFAuthentication] = None,
    timeout: Optional[int] = 15,
):
    if server_domain is None:
        server_domain = settings.OFF_SERVER_DOMAIN

    url = get_product_update_url(server_domain)

    comment = params.get("comment")
    cookies = None

    if auth is not None:
        if auth.session_cookie:
            cookies = {
                "session": auth.session_cookie,
            }
        elif auth.username:
            params["user_id"] = auth.username
            params["password"] = auth.password
    else:
        params.update(settings.off_credentials())

        if comment:
            params["comment"] = comment + " (automated edit)"

    if cookies is None and not params.get("password"):
        raise ValueError(
            "a password or a session cookie is required to update a product"
        )

    request_auth: Optional[Tuple[str, str]] = None
    if server_domain.endswith("openfoodfacts.net"):
        # dev environment requires authentication
        request_auth = ("off", "off")

    r = http_session.get(
        url, params=params, auth=request_auth, cookies=cookies, timeout=timeout
    )

    r.raise_for_status()
    json = r.json()

    status = json.get("status_verbose")

    if status != "fields saved":
        logger.warning(f"Unexpected status during product update: {status}")
Esempio n. 14
0
def fetch_taxonomy(url: str, fallback_path: str, offline=False) -> Optional[Taxonomy]:
    if offline:
        return Taxonomy.from_json(fallback_path)

    try:
        r = http_session.get(url, timeout=5)
        data = r.json()
    except Exception:
        logger.warning("Timeout while fetching '{}' taxonomy".format(url))
        if fallback_path:
            return Taxonomy.from_json(fallback_path)
        else:
            return None

    return Taxonomy.from_dict(data)
Esempio n. 15
0
def get_random_insight(insight_type: Optional[str] = None,
                       country: Optional[str] = None) -> JSONType:
    params = {}

    if insight_type:
        params["type"] = insight_type

    if country:
        params["country"] = country

    r = http_session.get(RANDOM_INSIGHT_URL, params=params)
    data = r.json()

    if data["status"] == "no_insights":
        raise NoInsightException()

    return data["insight"]
Esempio n. 16
0
def save_image(directory: pathlib.Path,
               image_meta: JSONType,
               barcode: str,
               override: bool = False):
    image_id = image_meta["imgid"]
    image_name = "{}_{}.jpg".format(barcode, image_id)
    image_path = directory / image_name

    if image_path.exists() and not override:
        return

    image_url = generate_image_url(barcode, image_id)
    logger.info("Downloading image {}".format(image_url))
    r = http_session.get(image_url)

    with open(str(image_path), "wb") as fd:
        logger.info("Saving image in {}".format(image_path))
        for chunk in r.iter_content(chunk_size=128):
            fd.write(chunk)
Esempio n. 17
0
def fetch_taxonomy(url: str, fallback_path: str, offline=False) -> Optional[Taxonomy]:
    if offline:
        return Taxonomy.from_json(fallback_path)

    try:
        r = http_session.get(url, timeout=120)  # might take some time
        if r.status_code >= 300:
            raise requests.HTTPError(
                "Taxonomy download at %s returned status code {r.status_code}", url
            )
        data = r.json()
    except Exception as e:
        logger.exception(f"{type(e)} exception while fetching taxonomy at %s", url)
        if fallback_path:
            return Taxonomy.from_json(fallback_path)
        else:
            return None

    return Taxonomy.from_dict(data)
Esempio n. 18
0
def get_ocr_result(ocr_url: str) -> Optional[ocr.OCRResult]:
    r = http_session.get(ocr_url)
    r.raise_for_status()

    ocr_data: Dict = r.json()
    return ocr.OCRResult.from_json(ocr_data)
Esempio n. 19
0
def fetch_images_for_ean(ean: str):
    url = BaseURLProvider().get(
    ) + "/api/v0/product/{}.json?fields=images".format(ean)
    images = http_session.get(url).json()
    return images