Beispiel #1
0
def ocr_iter(
    source: Union[str, TextIO, pathlib.Path]
) -> Iterable[Tuple[Optional[str], Dict]]:
    if isinstance(source, pathlib.Path):
        items = jsonl_iter(source)
        yield from ocr_content_iter(items)

    elif not isinstance(source, str):
        items = jsonl_iter_fp(source)
        yield from ocr_content_iter(items)

    elif is_barcode(source):
        barcode: str = source
        image_data = fetch_images_for_ean(source)["product"]["images"]

        for image_id in image_data.keys():
            if image_id.isdigit():
                print("Getting OCR for image {}".format(image_id))
                data = get_json_for_image(barcode, image_id)
                source = get_source(image_id, barcode=barcode)
                if data:
                    yield source, data

    else:
        input_path = pathlib.Path(source)

        if not input_path.exists():
            print("Unrecognized input: {}".format(input_path))
            return

        if input_path.is_dir():
            for json_path in input_path.glob("**/*.json"):
                with open(str(json_path), "rb") as f:
                    source = get_source(json_path.stem,
                                        json_path=str(json_path))
                    yield source, orjson.loads(f.read())
        else:
            if ".json" in input_path.suffixes:
                with open(str(input_path), "rb") as f:
                    yield None, orjson.loads(f.read())

            elif ".jsonl" in input_path.suffixes:
                items = jsonl_iter(input_path)
                yield from ocr_content_iter(items)
Beispiel #2
0
 def from_jsonl_fp(self, fp):
     items = jsonl_iter_fp(fp)
     self.import_insights(items, automatic=False)
Beispiel #3
0
 def from_jsonl_fp(self, fp, server_domain: str):
     items = jsonl_iter_fp(fp)
     self.import_insights(items,
                          server_domain=server_domain,
                          automatic=False)