def save_nearest_neighbors(logos: List[LogoAnnotation]) -> int: logo_ids_params = ",".join((str(logo.id) for logo in logos)) r = http_session.get( f"https://robotoff.openfoodfacts.org/api/v1/ann/batch?logo_ids={logo_ids_params}", timeout=30, ) response = r.json() results = {int(key): value for key, value in response["results"].items()} logo_id_to_logo = {logo.id: logo for logo in logos} missing_logo_ids = set(logo_id_to_logo.keys()).difference( set(results.keys())) if missing_logo_ids: logger.warning(f"Missing logo IDs in response: {missing_logo_ids}") saved = 0 for logo_id, logo_results in results.items(): if logo_id in logo_id_to_logo: logo = logo_id_to_logo[logo_id] distances = [n["distance"] for n in logo_results] logo_ids = [n["logo_id"] for n in logo_results] logo.nearest_neighbors = { "distances": distances, "logo_ids": logo_ids, } logo.save() saved += 1 return saved
def fetch_images_for_ean(ean: str): url = ( "https://world.openfoodfacts.org/api/v0/product/" "{}.json?fields=images".format(ean) ) images = http_session.get(url).json() return images
def iter_products(country: str, ingredient: str): ingredient_field = f"ingredients_text_{country}" base_url = BaseURLProvider().country(country).get() + "/ingredient" url = base_url + f"/{ingredient}/1.json?fields=code,{ingredient_field}" r = http_session.get(url) data = r.json() count = data["count"] page_size = data["page_size"] yield from data["products"] pages = count // page_size + int(count % page_size != 0) for page in range(2, pages): url = base_url + f"/{ingredient}/{page}.json?fields=code,{ingredient_field}" r = http_session.get(url) data = r.json() yield from data["products"]
def iter_products(country: str, ingredient: str): ingredient_field = f"ingredients_text_{country}" base_url = f"https://{country}.openfoodfacts.org/ingredient" url = base_url + f"/{ingredient}/1.json?fields=code,{ingredient_field}" r = http_session.get(url) data = r.json() count = data["count"] page_size = data["page_size"] yield from data["products"] pages = count // page_size + int(count % page_size != 0) for page in range(2, pages): url = base_url + f"/{ingredient}/{page}.json?fields=code,{ingredient_field}" r = http_session.get(url) data = r.json() yield from data["products"]
def save_nearest_neighbors(logos: List[LogoAnnotation]) -> int: logo_ids_params = ",".join((str(logo.id) for logo in logos)) r = http_session.get( settings.BaseURLProvider().robotoff().get() + "/api/v1/ann/batch?logo_ids=" + logo_ids_params, timeout=30, ) r.raise_for_status() response = r.json() results = {int(key): value for key, value in response["results"].items()} logo_id_to_logo = {logo.id: logo for logo in logos} missing_logo_ids = set(logo_id_to_logo.keys()).difference( set(results.keys())) if missing_logo_ids: logger.warning("Missing logo IDs in response: %s", missing_logo_ids) saved = 0 for logo_id, logo_results in results.items(): if logo_id in logo_id_to_logo: logo = logo_id_to_logo[logo_id] distances = [n["distance"] for n in logo_results] logo_ids = [n["logo_id"] for n in logo_results] logo.nearest_neighbors = { "distances": distances, "logo_ids": logo_ids, } logo.save() saved += 1 return saved
def get_product( barcode: str, fields: List[str] = None, server: Optional[Union[ServerType, str]] = None, timeout: Optional[int] = 10, ) -> Optional[Dict]: fields = fields or [] if server is None: server = ServerType.off url = get_api_product_url(server) + "/{}.json".format(barcode) if fields: # requests escape comma in URLs, as expected, but openfoodfacts server # does not recognize escaped commas. # See https://github.com/openfoodfacts/openfoodfacts-server/issues/1607 url += "?fields={}".format(",".join(fields)) r = http_session.get(url, timeout=timeout) if r.status_code != 200: return None data = r.json() if data["status_verbose"] != "product found": return None return data["product"]
def get_json_for_image(barcode: str, image_id: str) -> Optional[JSONType]: url = generate_json_ocr_url(barcode, image_id) r = http_session.get(url) if r.status_code == 404: return None return r.json()
def download_dataset(output_path: os.PathLike) -> str: r = http_session.get(settings.JSONL_DATASET_URL, stream=True) current_etag = r.headers.get("ETag", "").strip("'\"") logger.info("Dataset has changed, downloading file") logger.debug("Saving temporary file in {}".format(output_path)) with open(output_path, "wb") as f: shutil.copyfileobj(r.raw, f) return current_etag
def get_stored_logo_ids() -> Set[int]: r = http_session.get( "https://robotoff.openfoodfacts.org/api/v1/ann/stored", timeout=30) if not r.ok: logger.warning( f"error while fetching stored logo IDs ({r.status_code}): {r.text}" ) return set() return set(r.json()["stored"])
def get_stored_logo_ids() -> Set[int]: r = http_session.get(settings.BaseURLProvider().robotoff().get() + "/api/v1/ann/stored", timeout=30) if not r.ok: logger.warning( f"error while fetching stored logo IDs ({r.status_code}): %s", r.text) return set() return set(r.json()["stored"])
def move_to(barcode: str, to: ServerType, timeout: Optional[int] = 10) -> bool: if get_product(barcode, server=to) is not None: return False url = "{}/cgi/product_jqm.pl".format(settings.OFF_BASE_WEBSITE_URL) params = { "type": "edit", "code": barcode, "new_code": to, **AUTH_DICT, } r = http_session.get(url, params=params, timeout=timeout) data = r.json() return data["status"] == 1
def move_to(barcode: str, to: ServerType, timeout: Optional[int] = 10) -> bool: if get_product(barcode, server=to) is not None: return False url = "{}/cgi/product_jqm.pl".format(settings.BaseURLProvider().get()) params = { "type": "edit", "code": barcode, "new_code": str(to), **settings.off_credentials(), } r = http_session.get(url, params=params, timeout=timeout) data = r.json() return data["status"] == 1
def update_product( params: Dict, server_domain: Optional[str] = None, auth: Optional[OFFAuthentication] = None, timeout: Optional[int] = 15, ): if server_domain is None: server_domain = settings.OFF_SERVER_DOMAIN url = get_product_update_url(server_domain) comment = params.get("comment") cookies = None if auth is not None: if auth.session_cookie: cookies = { "session": auth.session_cookie, } elif auth.username: params["user_id"] = auth.username params["password"] = auth.password else: params.update(settings.off_credentials()) if comment: params["comment"] = comment + " (automated edit)" if cookies is None and not params.get("password"): raise ValueError( "a password or a session cookie is required to update a product" ) request_auth: Optional[Tuple[str, str]] = None if server_domain.endswith("openfoodfacts.net"): # dev environment requires authentication request_auth = ("off", "off") r = http_session.get( url, params=params, auth=request_auth, cookies=cookies, timeout=timeout ) r.raise_for_status() json = r.json() status = json.get("status_verbose") if status != "fields saved": logger.warning(f"Unexpected status during product update: {status}")
def fetch_taxonomy(url: str, fallback_path: str, offline=False) -> Optional[Taxonomy]: if offline: return Taxonomy.from_json(fallback_path) try: r = http_session.get(url, timeout=5) data = r.json() except Exception: logger.warning("Timeout while fetching '{}' taxonomy".format(url)) if fallback_path: return Taxonomy.from_json(fallback_path) else: return None return Taxonomy.from_dict(data)
def get_random_insight(insight_type: Optional[str] = None, country: Optional[str] = None) -> JSONType: params = {} if insight_type: params["type"] = insight_type if country: params["country"] = country r = http_session.get(RANDOM_INSIGHT_URL, params=params) data = r.json() if data["status"] == "no_insights": raise NoInsightException() return data["insight"]
def save_image(directory: pathlib.Path, image_meta: JSONType, barcode: str, override: bool = False): image_id = image_meta["imgid"] image_name = "{}_{}.jpg".format(barcode, image_id) image_path = directory / image_name if image_path.exists() and not override: return image_url = generate_image_url(barcode, image_id) logger.info("Downloading image {}".format(image_url)) r = http_session.get(image_url) with open(str(image_path), "wb") as fd: logger.info("Saving image in {}".format(image_path)) for chunk in r.iter_content(chunk_size=128): fd.write(chunk)
def fetch_taxonomy(url: str, fallback_path: str, offline=False) -> Optional[Taxonomy]: if offline: return Taxonomy.from_json(fallback_path) try: r = http_session.get(url, timeout=120) # might take some time if r.status_code >= 300: raise requests.HTTPError( "Taxonomy download at %s returned status code {r.status_code}", url ) data = r.json() except Exception as e: logger.exception(f"{type(e)} exception while fetching taxonomy at %s", url) if fallback_path: return Taxonomy.from_json(fallback_path) else: return None return Taxonomy.from_dict(data)
def get_ocr_result(ocr_url: str) -> Optional[ocr.OCRResult]: r = http_session.get(ocr_url) r.raise_for_status() ocr_data: Dict = r.json() return ocr.OCRResult.from_json(ocr_data)
def fetch_images_for_ean(ean: str): url = BaseURLProvider().get( ) + "/api/v0/product/{}.json?fields=images".format(ean) images = http_session.get(url).json() return images