Beispiel #1
0
class RegionVisualizer(object):
    def __init__(self, config={}):
        self.config = Configuration(config, DEFAULT_CONFIG)

    def __call__(self, image, regions, is_gt=False):
        for region in regions:
            self._viz_region(image, region, is_gt)
        return image

    def _draw_lines(self, image, region, color):
        if (len(region.path) > 0):
            if not self.config["filled"]:
                cv2.polylines(image, [np.array(region.path)], 1, color)
            else:
                cv2.fillPoly(image, [np.array(region.path)], color)
        else:
            cv2.rectangle(image, region.pos, region.get_bottom_right(), color,
                          1 if not self.config["filled"] else -1)

    def _color(self, region, is_gt=False):
        if is_gt:
            return (0, 255, 0)
        return (255, 0,
                0) if region.cls is not None and region.cls == 0 else (0, 0,
                                                                       255)

    def _draw_text(self, image, region, color):
        if region.text is not None and (region.cls is None or region.cls
                                        == 1) and self.config.default(
                                            "text", True):
            x, y = region.pos
            scale = 2 if self.config["large"] else 1
            thickness = 2 if self.config["large"] else 1
            reloc = 5 * scale
            # place text below if there is not enough space above
            y = y + reloc + region.size[1] if y - (20 +
                                                   reloc) < 0 else y - reloc
            cv2.putText(image, region.text, (x, y), cv2.FONT_HERSHEY_PLAIN,
                        scale, color, thickness)

    def _viz_region(self, image, region, is_gt=False):
        color = self._color(region, is_gt)
        self._draw_lines(image, region, color)
        self._draw_text(image, region, color)

    def store(self, vizimage, original_file):
        if self.config.default("store", False):
            os.makedirs(self.config["store"], exist_ok=True)
            filename = os.path.basename(original_file)
            cv2.imwrite(os.path.join(self.config["store"], filename), vizimage)
class Layer(object):

    __metaclass__ = abc.ABCMeta

    def __init__(self, config, defaults, data_format='nhwc'):
        self._config = Configuration(config)
        self._defaults = Configuration(defaults)
        self._format = data_format

    def __getitem__(self, key):
        default = self._defaults.default(key, None)
        return self._config.default(key, default)

    def _parse_format(self):
        return 'channels_first' if self._format == 'nchw' else 'channels_last'

    @abc.abstractmethod
    def __call__(self, x, is_train):
        pass
class SeparatedVisualizer(object):
    def __init__(self, config={}):
        self.config = Configuration(config, DEFAULT_CONFIG)

    def __call__(self, original, merged, is_gt=False):
        if len(original.shape) > 2 and original.shape[2] == 3:
            original = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
        return np.concatenate((original, merged), axis=1)

    def store(self, vizimage, original_file):
        if self.config.default("store", False):
            os.makedirs(self.config["store"], exist_ok=True)
            filename = os.path.basename(original_file)
            cv2.imwrite(os.path.join(self.config["store"], filename), vizimage)

    def store(self, vizimage, original_file):
        if self.config.default("store", False):
            os.makedirs(self.config["store"], exist_ok=True)
            filename = os.path.basename(original_file)
            cv2.imwrite(os.path.join(self.config["store"], filename), vizimage)
class AlgorithmBase(object):

    __metaclass__ = abc.ABCMeta

    _cpu = False

    def set_cpu(self, is_cpu):
        self._cpu = is_cpu

    def __init__(self, config, defaults):
        self._config = Configuration(config)
        self._defaults = Configuration(defaults)

    def __getitem__(self, key):
        default = self._defaults.default(key, None)
        return self._config.default(key, default)

    @abc.abstractmethod
    def build_graph():
        pass
Beispiel #5
0
class ImageVisualizer(object):
    def __init__(self, config={}):
        self.config = Configuration(config, DEFAULT_CONFIG)

    def __call__(self, original, merged, is_gt=False):
        return merged

    def store(self, vizimage, original_file):
        if self.config.default("store", False):
            os.makedirs(self.config["store"], exist_ok=True)
            filename = os.path.basename(original_file)
            cv2.imwrite(os.path.join(self.config["store"], filename), vizimage)
Beispiel #6
0
class PaperNoteWords(Dataset):
    def __init__(self, **kwargs):
        self.paper_note_path = kwargs.get('paper_note_path',
                                          '../paper-notes/data/words')
        self.meta = Configuration(kwargs.get('meta', {}))
        self.data_config = Configuration(kwargs.get('data_config', {}))
        self.vocab = kwargs.get('vocab', {})
        self.pure = kwargs.get('pure', True)

        self.max_length = kwargs.get('max_length')
        self._load_data()
        self._compile_sets()
        self.augmenter = ImageAugmenter(self.data_config)

    def info(self):
        pass

    def _compile_set(self, dataset):
        for item in self.data[dataset]:
            item['compiled'] = self.compile(item['truth'])

    def _compile_sets(self):
        self._compile_set("train")
        self._compile_set("dev")
        self._compile_set("test")

    def _load_data(self):
        prefix = "pure_" if self.pure else ""
        self.data = {
            "dev": self._load_wordlist("{}dev".format(prefix)),
            "train": self._load_wordlist("{}train".format(prefix)),
            "test": self._load_wordlist("{}test".format(prefix)),
            "print_dev": self._load_classlist("dev"),
            "print_test": self._load_classlist("test"),
            "print_train": self._load_classlist("train"),
        }

    def _load_wordlist(self, subset):
        basepath = os.path.join(self.paper_note_path, subset)
        words = util.loadJson(basepath, "words")
        parsed = []
        for word in words:
            parsed.append(
                self._fileobj(basepath, "{}.png".format(word), words[word]))
        return parsed

    def _load_classlist(self, subset):
        files = self._load_filelist(subset, 1)
        files.extend(
            self._load_filelist("print_{}".format(subset), 0, len(files)))
        return files

    def _load_filelist(self, subset, is_htr, length=None) -> list:
        basepath = os.path.join(self.paper_note_path, subset)
        if os.path.exists(basepath):
            all_files = os.listdir(basepath)
            shuffle(all_files)
            length = len(all_files) if length is None else min(
                length, len(all_files))
            files = list(
                filter(lambda x: x.endswith(".png"), all_files[:length]))
            return list(
                map(lambda x: self._fileobj(basepath, x, is_htr), files))
        return []

    def _fileobj(self, basepath: str, filename: str, truth):
        return {
            "path": os.path.join(basepath, filename),
            "truth": truth,
        }

    def compile(self, text):
        parsed = [self.vocab[1][c] for c in text]
        parsed.extend([-1] * (self.max_length - len(text)))
        return parsed

    def decompile(self, values):
        def getKey(key):
            try:
                return self.vocab[0][str(key)]
            except KeyError:
                return ''

        return ''.join([getKey(c) for c in values])

    def getBatchCount(self, batch_size, max_batches=0, dataset="train"):
        total_len = len(self.data[dataset])
        num_batches = int(math.ceil(float(total_len) / batch_size))
        return min(num_batches,
                   max_batches) if max_batches > 0 else num_batches

    def generateBatch(self,
                      batch_size,
                      max_batches=0,
                      dataset="train",
                      with_filepath=False,
                      augmentable=False):
        num_batches = self.getBatchCount(batch_size, max_batches, dataset)
        if self.data_config.default('shuffle_epoch', False):
            shuffle(self.data[dataset])
        for b in range(num_batches):
            yield self._load_batch(b,
                                   batch_size,
                                   dataset,
                                   with_filepath,
                                   augmentable=augmentable)
        pass

    def load_image(self, path, transpose=False, augmentable=False):
        target_size = (
            int(self.meta["height"] -
                (self.data_config.default('preprocess.padding', 0) * 2)),
            int(self.meta["width"] -
                (self.data_config.default('preprocess.padding', 0) * 2)))
        x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if x is None or x.shape[0] == 0 or x.shape[1] == 0:
            return None
        x = self.augmenter.preprocess(x, target_size)
        if x is None:
            return None
        if self.data_config.default("otf_augmentations",
                                    False) and augmentable:
            x = self.augmenter.augment(x)
        else:
            x = self.augmenter.add_graychannel(x)

        if x.shape[1] != self.meta["width"] or x.shape[0] != self.meta[
                "height"]:
            x = self.augmenter.pad_to_size(x,
                                           width=self.meta["width"],
                                           height=self.meta["height"])

        return self.augmenter.add_graychannel(x)

    def _loadline(self, line, transpose=True, augmentable=False):
        l = len(line["truth"])
        y = np.asarray(line["compiled"])
        x = self.load_image(line["path"], augmentable=augmentable)
        return x, y, l, line["path"]

    def _loadprintline(self, line, transpose=True, augmentable=False):
        y = line["truth"]
        x = self.load_image(line["path"], augmentable=augmentable)
        return x, [y], 0, line["path"]

    def _load_batch(self,
                    index,
                    batch_size,
                    dataset,
                    with_filepath=False,
                    augmentable=False):
        X = []
        Y = []
        L = []
        F = []

        parseline = self._loadline if not dataset.startswith(
            "print_") else self._loadprintline

        for idx in range(
                index * batch_size,
                min((index + 1) * batch_size, len(self.data[dataset]))):
            x, y, l, f = parseline(self.data[dataset][idx],
                                   augmentable=augmentable)
            if x is not None:
                X.append(x)
                Y.append(y)
                L.append(l)
                F.append(f)
        X = np.asarray(X)
        Y = np.asarray(Y)
        L = np.asarray(L)
        if not with_filepath:
            return X, Y, L
        else:
            return X, Y, L, F

    # deprecated

    def generateEpochs(self,
                       batch_size,
                       num_epochs,
                       max_batches=0,
                       dataset="train",
                       with_filepath=False,
                       augmentable=False):
        for e in range(num_epochs):
            yield self.generateBatch(batch_size,
                                     max_batches=max_batches,
                                     dataset=dataset,
                                     with_filepath=with_filepath,
                                     augmentable=augmentable)
Beispiel #7
0
class E2ERunner(object):
    def __init__(self, config={}, globalConfig={}):
        self.config = Configuration(config)
        self.globalConfig = Configuration(globalConfig)
        self._parse_config()
        self.logger = Logger()
        self.config()

    def _parse_config(self):
        self._parse_blocks(self.config["blocks"])
        self.viz = self._parse_visualizer(self.config.default("viz", None))
        self.gtprov = self._parse_gt(self.config.default("gt", None))
        self.evals = self._parse_evals(self.config.default('eval', []))

    def _parse_blocks(self, blocks):
        self.blocks = [
            self._parse_block(block) for block in blocks
            if "disabled" not in block or not block["disabled"]
        ]

    def _parse_block(self, block):
        if block["type"] == "TextSeparation":
            return TextSeparation(self.globalConfig, block)
        elif block["type"] == "WordDetection":
            return WordDetection(block)
        elif block["type"] == "LineSegmentation":
            return LineSegmentation(block)
        elif block["type"] == "ParagraphDetection":
            return ParagraphDetection(block)
        elif block["type"] == "UnigramLanguageModel":
            return UnigramLanguageModel(block)
        elif block["type"] == "Ceiling":
            return Ceiling(block)
        elif block["type"] == "TranscriptionAndClassification":
            return TranscriptionAndClassification(self.globalConfig, block)

    def _parse_evals(self, eval_configs):
        return [self._parse_eval(config) for config in eval_configs]

    def _parse_eval(self, config):
        if config is None:
            return None
        if config["type"] == "IoU":
            return IoU(config)
        elif config["type"] == "IoUPixelSum":
            return IoUPixelSum(config)
        elif config["type"] == "BagOfWords":
            return BagOfWords(config)
        elif config["type"] == "IoUCER":
            return IoUCER(config)

    def _parse_data(self, data_config):
        if isinstance(data_config, list):
            return data_config
        else:
            prefix = data_config["prefix"] if "prefix" in data_config else ""
            filenames = list(
                filter(
                    lambda f: f.endswith(data_config["suffix"]) and f.
                    startswith(prefix), os.listdir(data_config["path"])))
            if data_config["limit"] > 0:
                filenames = filenames[:data_config["limit"]]
            return [
                os.path.join(data_config["path"], filename)
                for filename in filenames
            ]

    def _parse_visualizer(self, viz_config):
        if viz_config is None:
            return None
        if viz_config["type"] == "RegionVisualizer":
            return RegionVisualizer(viz_config)
        elif viz_config["type"] == "ImageVisualizer":
            return ImageVisualizer(viz_config)
        elif viz_config["type"] == "SeparatedVisualizer":
            return SeparatedVisualizer(viz_config)

    def _parse_gt(self, gt_config):
        if gt_config is None:
            return None
        if gt_config["type"] == "WordRegion":
            return WordRegionGTProvider()
        elif gt_config["type"] == "ParagraphRegion":
            return ParagraphRegionGTProvider()
        elif gt_config["type"] == "LineRegion":
            return LineRegionGTProvider()

    def __call__(self, log_prefix="E2E", skip_range_evaluation=False):
        if not skip_range_evaluation and self.config.default("ranger", False):
            self.logger.write("Entering Range Execution Mode")
            return self._range_exec()
        start = time()
        self.scores = {}
        data = self._parse_data(self.config["data"])
        results = []
        times = []
        for idx, file in enumerate(data):
            file_time = time()
            self.logger.progress(log_prefix, idx, len(data))
            results.append(self._exec(file))
            times.append(time() - file_time)
        [block.close() for block in self.blocks]
        if len(self.evals) > 0:
            final_scores = {
                "time": time() - start,
                "median time": np.median(times),
                "avg time": np.average(times)
            }
            for score_key in self.scores:
                final_scores[score_key] = np.average(self.scores[score_key])
            self.logger.summary(log_prefix, final_scores)
        return results

    def _get_range(self):
        if type(self.config["ranger.values"]) is dict:
            return frange(self.config["ranger.values.from"],
                          self.config["ranger.values.to"],
                          self.config["ranger.values.step"])

    def _range_exec(self):
        def set_config(value):
            for path in self.config.default(
                    "ranger.paths", [self.config.default("ranger.path", [])]):
                current = self.config
                for step in path[:-1]:
                    current = current[step]
                current[path[-1]] = value
            self._parse_config()

        for val in self._get_range():
            set_config(val)
            prefix = self.config.default("ranger.template", "value {}")
            self(log_prefix=prefix.format(val), skip_range_evaluation=True)

    def _exec(self, file):
        original = cv2.imread(file)
        last_output = original.copy()

        for block in self.blocks:
            last_output = block(last_output, file)
        res = {"file": file, "original": original, "result": last_output}
        if self.gtprov is not None:
            gt = self.gtprov(file, original)
        if self.viz is not None:
            vizimage = res["original"].copy()
            if self.gtprov is not None and self.config.default(
                    'gt.viz', False):
                vizimage = self.viz(vizimage, gt, True)
            if len(self.blocks) > 0:
                vizimage = self.viz(vizimage, res["result"], False)
            self.viz.store(vizimage, file)
            res["viz"] = vizimage
        if len(self.evals) > 0:
            for evl in self.evals:
                scores = evl(gt, res["result"])
                for score_key in scores.keys():
                    self.scores[score_key] = [
                        scores[score_key]
                    ] if score_key not in self.scores else [
                        scores[score_key], *self.scores[score_key]
                    ]
        return res
Beispiel #8
0
class ImageAugmenter(object):

    def __init__(self, config):
        self.config = Configuration(config)

    def augment(self, img, get_settings=False):
        augmentation_settings = {}
        if "warp" in self.config["otf_augmentations"]:
            if np.random.uniform() < self.config['otf_augmentations.warp.prob']:
                if(not self.config.default('preprocess.invert', False)):
                    img = 255 - img
                reshaped = False
                if len(img.shape) > 2:
                    reshaped = True
                    img = np.reshape(img, (img.shape[0], img.shape[1]))
                img = convert._cv2pil(img)
                img, mat = warp._warp(
                    img,
                    gridsize=self.config['otf_augmentations.warp.gridsize'],
                    deviation=self.config['otf_augmentations.warp.deviation'],
                    return_mat=True)
                augmentation_settings["warp"] = {
                    "gridsize": self.config['otf_augmentations.warp.gridsize'],
                    "mat": mat
                }
                img = convert._pil2cv2(img)
                if reshaped:
                    img = np.reshape(img, (img.shape[0], img.shape[1], 1))
                if(not self.config.default('preprocess.invert', False)):
                    img = 255 - img
        if "affine" in self.config["otf_augmentations"]:
            if(self.config.default('preprocess.invert', False)):
                img = 255 - img
            img, mat = affine._affine(
                img, self.config["otf_augmentations.affine"], return_mat=True)
            augmentation_settings["affine"] = {
                "mat": mat
            }
            if(self.config.default('preprocess.invert', False)):
                img = 255 - img
        if "morph" in self.config["otf_augmentations"]:
            img, op_name, op_values = morph._random_morph(
                img, self.config["otf_augmentations.morph"], self.config.default('preprocess.invert', False), True)
            augmentation_settings["affine"] = {
                "op_name": op_name,
                "op_values": op_values
            }
        if "binarize" in self.config["otf_augmentations"]:
            if np.random.uniform() < self.config['otf_augmentations.binarize.prob']:
                img = binarize._binarize(img)
                augmentation_settings["binarize"] = {}
        if "blur" in self.config["otf_augmentations"]:
            if np.random.uniform() < self.config['otf_augmentations.blur.prob']:
                img = cv2.GaussianBlur(
                    img, tuple(self.config['otf_augmentations.blur.kernel']), self.config['otf_augmentations.blur.sigma'])
                augmentation_settings["blur"] = {
                    "kernel": self.config['otf_augmentations.blur.kernel'],
                    "sigma": self.config['otf_augmentations.blur.sigma']
                }
        if "sharpen" in self.config["otf_augmentations"]:
            if np.random.uniform() < self.config['otf_augmentations.sharpen.prob']:
                img = self._unsharp_mask_filter(
                    img, tuple(self.config['otf_augmentations.sharpen.kernel']), self.config['otf_augmentations.sharpen.sigma'])
                augmentation_settings["sharpen"] = {
                    "kernel": self.config['otf_augmentations.sharpen.kernel'],
                    "sigma": self.config['otf_augmentations.sharpen.sigma']
                }
        if "brighten" in self.config["otf_augmentations"]:
            if np.random.uniform() < self.config['otf_augmentations.brighten.prob']:
                factor = np.random.normal(
                    self.config['otf_augmentations.brighten.center'], self.config['otf_augmentations.brighten.stdv'])
                factor = factor if factor >= 1 else 1
                img = np.uint8(np.clip(img * factor, 0, 255))
                augmentation_settings["brighten"] = {
                    "factor": factor
                }
        if "darken" in self.config["otf_augmentations"]:
            if np.random.uniform() < self.config['otf_augmentations.darken.prob']:
                factor = np.random.normal(
                    self.config['otf_augmentations.darken.center'], self.config['otf_augmentations.darken.stdv'])
                factor = factor if factor >= 1 else 1
                img = 255 - np.uint8(np.clip((255 - img) * factor, 0.0, 255.0))
                augmentation_settings["darken"] = {
                    "factor": factor
                }
        if not get_settings:
            return self.add_graychannel(img)
        else:
            return self.add_graychannel(img), Configuration(augmentation_settings)

    def binarization(self, img):
        if(self.config.default('preprocess.invert', False)):
            img = 255 - img
        _, img = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)
        if(self.config.default('preprocess.invert', False)):
            img = 255 - img
        return self.add_graychannel(img)

    def apply_augmentation(self, img, settings):
        if settings.default("warp", False):
            if(not self.config.default('preprocess.invert', False)):
                img = 255 - img
            reshaped = False
            if len(img.shape) > 2:
                reshaped = True
                img = np.reshape(img, (img.shape[0], img.shape[1]))
            img = convert._cv2pil(img)
            img = warp._warp(
                img,
                gridsize=settings['warp.gridsize'],
                mat=settings['warp.mat'])
            img = convert._pil2cv2(img)
            if reshaped:
                img = np.reshape(img, (img.shape[0], img.shape[1], 1))
            if(not self.config.default('preprocess.invert', False)):
                img = 255 - img
        if settings.default("affine", False):
            img = affine._affine(
                img, mat=settings["affine.mat"], background=255.0)
        if settings.default("morph", False):
            img = morph._morph(img, settings['morph.op_name'], settings['morph.op_values'], self.config.default(
                'preprocess.invert', False))
        if settings.default("binarize", False):
            img = binarize._binarize(img)
        if settings.default("blur", False):
            img = cv2.GaussianBlur(
                img, tuple(settings['blur.kernel']), settings['blur.sigma'])
        if settings.default("sharpen", False):
            img = self._unsharp_mask_filter(
                img, tuple(settings['sharpen.kernel']), settings['sharpen.sigma'])
        if settings.default("brighten", False):
            img = np.uint8(
                np.clip(img * settings["brighten.factor"], 0.0, 255.0))
        if settings.default("darken", False):
            img = 255 - np.uint8(
                np.clip((255 - img) * settings["darken.factor"], 0.0, 255.0))
        return self.add_graychannel(img)

    def _unsharp_mask_filter(self, image, kernel, sigma):
        gaussian_3 = cv2.GaussianBlur(image, kernel, sigma)
        return cv2.addWeighted(image, 1.5, gaussian_3, -0.5, 0, image)

    def add_graychannel(self, img):
        if len(img.shape) == 2:
            return np.reshape(img, [img.shape[0], img.shape[1], 1])
        return img

    def pad_to_size(self, img, height, width):
        return self._pad(img, (height, width, 1))

    def _scale(self, img, factor, target_size=None):
        height = int(img.shape[0] / factor)
        width = int(img.shape[1] / factor)
        if width <= 0 or height <= 0:
            return None
        return cv2.resize(img, (width, height))

    def _scale_img(self, img, scale_factor, target_size=None):
        if img.shape[0] == 0 or img.shape[1] == 0:
            return None
        factor = max(img.shape[0] / target_size[0],
                     img.shape[1] / target_size[1],
                     scale_factor)
        img = self._scale(img, factor)
        return img

    def preprocess(self, img, target_size=None):
        bg = 255
        if self.config.default('preprocess.invert', False):
            img = invert._invert(img)
            bg = 255 - bg

        if self.config.default('preprocess.crop', False):
            if img.shape[0] == 0 or img.shape[1] == 0:
                return None
            img = crop._crop(img)
            if img is None:
                return None

        if self.config.default('preprocess.scale', False):
            img = self._scale_img(
                img, self.config['preprocess.scale'], target_size)
            if img is None:
                return None

        if self.config.default('preprocess.padding', False):
            img = padding._pad_cv2(img, self.config['preprocess.padding'], bg)
        img = self.add_graychannel(img)
        if target_size != None:
            target_size = (
                target_size[0] +
                (self.config.default('preprocess.padding', 0)*2),
                target_size[1] +
                (self.config.default('preprocess.padding', 0)*2),
                1
            )
            img = self._pad(img, target_size)
        return img

    def postprocesss(self, img):
        if self.config.default('postprocess.binarize', False):
            img = self.binarization(img)
        return img

    def _pad(self, array, reference_shape, offsets=None):
        """
        array: Array to be padded
        reference_shape: tuple of size of ndarray to create
        offsets: list of offsets (number of elements must be equal to the dimension of the array)
        will throw a ValueError if offsets is too big and the reference_shape cannot handle the offsets
        """
        offsets = offsets if offsets is not None else [
            0] * len(array.shape)
        # Create an array of zeros with the reference shape
        result = np.zeros(reference_shape)
        # Create a list of slices from offset to offset + shape in each dimension
        insertHere = [slice(offsets[dim], offsets[dim] + array.shape[dim])
                      for dim in range(array.ndim)]
        # Insert the array in the result at the specified offsets
        result[tuple(insertHere)] = array
        return result
class RegionDataset(Dataset):
    def __init__(self, regions, model_path, data_config={}):
        self.model_path = model_path
        self._load_vocab()
        self._load_meta()
        self._scaling = 1.0
        self._max_height = 10000
        self._max_width = 10000
        self.set_regions(regions)
        self.data_config = Configuration(data_config, DEFAULT_DATACONFIG)
        self.augmenter = ImageAugmenter(self.data_config)

    def info(self):
        self.meta('Dataset Configuration')

    def scaling(self, scaling, max_height, max_width):
        self.augmenter.config['preprocess.scale'] = scaling
        self._max_height = max_height
        self._max_width = max_width

    def _load_meta(self):
        self.meta = Configuration(util.loadJson(self.model_path, "data_meta"))

    def _load_vocab(self):
        self.vocab = util.loadJson(self.model_path, "vocab")
        self.vocab_length = len(self.vocab[0])

    def _load_sets(self):
        self.data = np.asarray(
            list(
                filter(lambda x: x is not None,
                       [self._loadimage(region) for region in self.regions])))

    def _loadimage(self, region):
        if region.img.shape[0] == 0 or region.img.shape[1] == 0:
            img = np.zeros((self.meta["height"], self.meta["width"]))
        elif len(region.img.shape) > 2:
            img = cv2.cvtColor(region.img, cv2.COLOR_BGR2GRAY)
        else:
            img = region.img
        target_size = (
            int(self.meta["height"] -
                (self.data_config.default('preprocess.padding', 0) * 2)),
            int(self.meta["width"] -
                (self.data_config.default('preprocess.padding', 0) * 2)))
        img = self.augmenter.preprocess(img, target_size)
        if img is not None:
            img = self.augmenter.postprocesss(img)
        if img is None:
            img = np.zeros((self.meta["height"], self.meta["width"]))
        return self.augmenter.add_graychannel(img)

    def set_regions(self, regions):
        self.regions = regions
        if regions is not None:
            self._load_sets()

    def compile(self, text):
        parsed = [self.vocab[1][c] for c in text]
        parsed.extend([-1] * (self.max_length - len(text)))
        return parsed

    def decompile(self, values):
        def getKey(key):
            try:
                return self.vocab[0][str(key)]
            except KeyError:
                return ''

        return ''.join([getKey(c) for c in values])

    def _load_batch(self, index, batch_size, dataset, with_filepath=False):
        batch_data = np.asarray(
            self.data[index * batch_size:min((index + 1) *
                                             batch_size, len(self.data))])
        if with_filepath:
            return batch_data, [], [], []
        else:
            return batch_data, [], []

    def generateBatch(self,
                      batch_size=0,
                      max_batches=0,
                      dataset="",
                      with_filepath=False):
        num_batches = self.getBatchCount(batch_size, max_batches, "")
        for b in range(num_batches):
            yield self._load_batch(b, batch_size, "", with_filepath)
        pass

    def generateEpochs(self,
                       batch_size,
                       num_epochs,
                       max_batches=0,
                       dataset="train",
                       with_filepath=False):
        return [self.generateBatch()]

    def getBatchCount(self, batch_size, max_batches=0, dataset=""):
        return int(np.ceil(len(self.data) / float(batch_size)))
Beispiel #10
0
class PrintGenerator(object):

    FILTERS = {
        'blur':
        lambda i, c: i.filter(ImageFilter.GaussianBlur(c['radius'])),
        'sharpen':
        lambda i, c: i.filter(
            ImageFilter.UnsharpMask(c['radius'], c['percent'], c['threshold'])
        ),
        'warp':
        lambda i, c: _warp(i, c['grid'], c['deviation']),
        'affine':
        lambda i, c: PrintGenerator._affine_filter(i, c['config'])
    }

    def __init__(self, config={}):
        self.config = Configuration(config)
        self.default = Configuration(DEFAULTS)
        self.max_size = (0, 0)
        self.max_height = -1
        self.max_width = -1

    def __getitem__(self, key):
        default = self.default.default(key, None)
        return self.config.default(key, default)

    def _random_font(self):
        return self['fonts'][np.random.randint(0, len(self['fonts']))]

    def _random_height(self):
        return max(
            min(
                int(
                    np.random.normal(self['height.center', True],
                                     self['height.scale', True])),
                self['height.max', True]), self['height.min', True])

    def _random_foreground(self):
        if self['foreground.low'] == self['foreground.high']:
            return self['foreground.low']
        return np.random.randint(self['foreground.low'],
                                 self['foreground.high'])

    def _iterate_height(self, text, fontname, height):
        font = ImageFont.truetype(fontname, size=height)
        size, offset = font.font.getsize(text)
        image_size = (size[0] + offset[0] + self['printing_padding'] * 2,
                      size[1] + offset[1] + self['printing_padding'] * 2)
        if self.max_height > -1 and image_size[1] > self.max_height:
            height = int(height * (self.max_height / float(image_size[1])))
            return self._iterate_height(text, fontname, height)
        elif self.max_width > -1 and image_size[0] > self.max_width:
            height = int(height * (self.max_width / float(image_size[0])))
            return self._iterate_height(text, fontname, height)
        else:
            return font, offset, image_size

    def _create_text_image(self, text, font, height, background, foreground):
        font, offset, image_size = self._iterate_height(text, font, height)
        self.max_size = np.max([self.max_size, image_size], axis=0)
        image = Image.new("L", image_size, background)
        draw = ImageDraw.Draw(image)

        draw.text((self['printing_padding'],
                   -(offset[1] / 2) + self['printing_padding']),
                  text,
                  font=font,
                  fill=foreground)
        return image

    def _apply_filter(self, image, filter_config):
        if filter_config['prob'] > np.random.rand():
            image = self.FILTERS[filter_config['type']](image, filter_config)
        return image

    def _apply_filters(self, image):
        for _filter in self['filters']:
            image = self._apply_filter(image, _filter)
        return image

    def _crop(self, image, invert):
        if image is None:
            return None
        if self['crop']:
            image = _pil2cv2(image)
            if not invert:
                image = 255 - image
            image = _crop(image)
            if not invert:
                image = 255 - image
            image = _cv2pil(image)
        return image

    def _pad(self, image, background):
        if self['padding'] > 0:
            image = _pad_pil(image, self['padding'], background)
        return image

    def __call__(self, text, invert=False):
        foreground = self._random_foreground()
        background = self['background']
        if invert:
            foreground = 255 - foreground
            background = 255 - background
        font = self._random_font()
        height = self._random_height()
        image = self._create_text_image(text, font, height, background,
                                        foreground)
        image = self._apply_filters(image)
        image = self._crop(image, invert)
        if image is None:
            return None
        image = self._pad(image, invert)
        return image

    @staticmethod
    def clean_text(text):
        text = PUNCTUATION_REGEX.sub('', text)
        text = REGULAR_REGEX.sub(' ', text)
        return text

    @staticmethod
    def _affine_filter(image, config):
        image = _pil2cv2(image)
        image = _affine(image, config)
        return _cv2pil(image)
Beispiel #11
0
class PageHandwritingBlender(object):

    DEFAULTS = Configuration({
        "background": 255,
        "augmentation": {
            "line": {
                "scale": {
                    "prob": 1.0,
                    "center": -.25,
                    "stdv": 0.15
                }
            },
            "page": [{
                'type': 'blur',
                'prob': 0.5,
                'kernel': (3, 3),
                'sigma': 1
            }, {
                'type': 'sharpen',
                'prob': 0.5,
                'kernel': (3, 3),
                'sigma': 1
            }, {
                'type': 'warp',
                'prob': 0.5,
                'config': {
                    'deviation': 2.7,
                    'gridsize': [100, 30]
                }
            }]
        },
        "filters": {
            'blur':
            lambda i, c: cv2.GaussianBlur(i, c['kernel'], c['sigma']),
            'sharpen':
            lambda i, c: PageHandwritingBlender._unsharp_mask_filter(
                i, c['kernel'], c['sigma']),
            'warp':
            lambda i, c: PageHandwritingBlender._warp_filter(i, c['config']),
            'affine':
            lambda i, c: PageHandwritingBlender._affine_filter(i, c['config'])
        }
    })

    #################################
    # PUBLIC METHODS
    ###############################

    def __init__(self, page, config={}):
        self.page = page
        self.config = Configuration(config)
        self.truth = np.full(page.shape, self['background'])
        self._augment_page()

    def __call__(self, line):
        line = self._augment_line(line)
        h, w, _ = line.shape
        x, y = self._random_position(h, w)
        self._insert(line, x, y)

    def save(self, pagefile, truthfile):
        cv2.imwrite(pagefile, self.page)
        cv2.imwrite(truthfile, self.truth)

    def __getitem__(self, key):
        return self.config.default(key, self.DEFAULTS.default(key, None))

    ############################
    # PRIVATE METHODS
    ################################

    def _random_position(self, h, w):
        ph, pw, pc = self.page.shape

        def rand(mx):
            # loc = np.random.uniform(0, 1)
            # x = abs(np.random.normal(0.0, mx/15.0))
            # return int(x if loc < 0.5 else mx - x)
            x = np.random.uniform(0, mx)
            return int(x)

        return rand(pw - w), rand(ph - h)

    def _insert(self, line, x, y):
        ph, pw, pc = self.page.shape
        lh, lw, lc = line.shape
        off_x = x if lw + x <= pw else x - (lw + x - pw)
        off_y = y if lh + y <= ph else y - (lh + y - ph)
        self.page[off_y:off_y + lh, off_x:off_x + lw, :] &= line
        self.truth[off_y:off_y + lh, off_x:off_x + lw, :] &= line

    def _augment_line(self, line):
        line = cv2.cvtColor(line, cv2.COLOR_BGR2GRAY)
        line = _threshold(line, False)
        line = cv2.cvtColor(line, cv2.COLOR_GRAY2BGR)
        at = AffineTransformation(line)
        at.configure(self['augmentation.line'])
        return at(background=[self['background']] * 3)

    def _augment_page(self):
        for _filter in self['augmentation.page']:
            if _filter['prob'] > np.random.rand():
                self.page = self['filters'][_filter['type']](self.page,
                                                             _filter)

    #######################################
    # STATIC METHODS
    #######################################

    @staticmethod
    def _affine_filter(image, config):
        at = AffineTransformation(image)
        at.configure(config)
        return at()

    @staticmethod
    def _warp_filter(image, config):
        image = _cv2pil(image, 'RGB')
        image = _warp(image, config['gridsize'], config['deviation'])
        return _pil2cv2(image, 'RGB')

    @staticmethod
    def _unsharp_mask_filter(image, kernel, sigma):
        gaussian_3 = cv2.GaussianBlur(image, kernel, sigma)
        return cv2.addWeighted(image, 1.5, gaussian_3, -0.5, 0, image)
Beispiel #12
0
class PreparedDataset(Dataset):
    def __init__(self, name, transpose=True, data_config={}):
        self.name = name
        self.data_config = Configuration(data_config)
        self.min_width_factor = 15
        self.max_min_width = 400
        self.datapath = os.path.join(util.OUTPUT_PATH, name)
        self._load_vocab()
        self._load_meta()
        self._load_sets()
        self._calc_max_length()
        self._compile_sets()
        self.transpose = transpose
        self.channels = 1
        self._fill_meta()
        self.augmenter = ImageAugmenter(self.data_config)
        self.unfiltered = {}

    def load_vocab(self, path):
        self._load_vocab(path)
        self._compile_sets()
        self._fill_meta()

    def info(self):
        self.meta('Dataset Configuration')

    def _load_meta(self):
        self.meta = Configuration(util.loadJson(self.datapath, "meta"))

    def _load_vocab(self, path=None):
        path = path or self.datapath
        self.vocab = util.loadJson(path, "vocab")
        self.vocab_length = len(self.vocab[0])

    def _fill_meta(self):
        self.meta['vocab.size'] = self.vocab_length
        self.meta['train.count'] = len(self.data['train'])
        self.meta['train.count'] = len(self.data['train'])
        self.meta['dev.count'] = len(self.data['dev'])
        self.meta['test.count'] = len(self.data['test'])
        if 'print_train' in self.data:
            self.meta['print_train.count'] = len(self.data['print_train'])
            self.meta['print_dev.count'] = len(self.data['print_dev'])
            self.meta['print_test.count'] = len(self.data['print_test'])

    def _load_sets(self):
        self.data = {
            "train": util.loadJson(self.datapath, "train"),
            "dev": util.loadJson(self.datapath, "dev"),
            "test": util.loadJson(self.datapath, "test")
        }
        if self.meta.default('printed', False):
            self.data['print_train'] = util.loadJson(self.datapath,
                                                     "print_train")
            self.data['print_dev'] = util.loadJson(self.datapath, "print_dev")
            self.data['print_test'] = util.loadJson(self.datapath,
                                                    "print_test")
        if self.data_config.default('sort_by_width', False):
            self._sort_by_width("train")
            self._sort_by_width("dev")
            if self.meta.default('printed', False):
                self._sort_by_width("print_train")
                self._sort_by_width("print_dev")

    def _sort_by_width(self, dataset):
        print("Sorting {} dataset by width...".format(dataset))
        for datapoint in self.data[dataset]:
            img = cv2.imread(datapoint["path"], cv2.IMREAD_GRAYSCALE)
            datapoint["width"] = img.shape[1]
        self.data[dataset].sort(key=lambda x: x["width"], reverse=True)

    def _compile_set(self, dataset):
        for item in self.data[dataset]:
            item['compiled'] = self.compile(item['truth'])

    def _filter_by_type(self, subset):
        filtered = []

        if subset not in self.unfiltered:
            self.unfiltered[subset] = self.data[subset]
        for file in self.unfiltered[subset]:
            if file['type'] in self.data_config['type_probs']:
                if np.random.uniform() <= self.data_config['type_probs'][
                        file['type']]:
                    filtered.append(file)
            else:
                filtered.append(file)
        self.data[subset] = filtered

    def _filter_data(self):
        if self.data_config.default('type_probs', False):
            self._filter_by_type('train')
            if self.meta.default('printed', False):
                self._filter_by_type('print_train')

    def _compile_sets(self):
        self._compile_set("train")
        self._compile_set("dev")
        self._compile_set("test")

    def _calc_max_length(self):
        _all = []
        _all.extend(self.data["train"])
        _all.extend(self.data["test"])
        _all.extend(self.data["dev"])
        self.max_length = max(map(lambda x: len(x["truth"]), _all))

    def compile(self, text):
        parsed = [self.vocab[1][c] for c in text]
        # if not self.dynamic_width:
        parsed.extend([-1] * (self.max_length - len(text)))
        return parsed

    def decompile(self, values):
        def getKey(key):
            try:
                return self.vocab[0][str(key)]
            except KeyError:
                return ''

        return ''.join([getKey(c) for c in values])

    def load_image(self, path, transpose=False, augmentable=False):
        x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if self.data_config.default("otf_augmentations",
                                    False) and augmentable:
            x = self.augmenter.augment(x)
        else:
            x = self.augmenter.add_graychannel(x)
        if transpose:
            try:
                x = np.transpose(x, [1, 0])
                if self.data_config.default('dynamic_width', False):
                    return self.augmenter.add_graychannel(x)
            except ValueError:
                return None, None, None, None
            if x.shape[0] != self.meta["width"] or x.shape[1] != self.meta[
                    "height"]:
                x = self.augmenter.pad_to_size(x,
                                               width=self.meta["width"],
                                               height=self.meta["height"])
            return self.augmenter.add_graychannel(x)
        else:
            if self.data_config.default('dynamic_width', False):
                return self.augmenter.add_graychannel(x)
            if x.shape[1] != self.meta["width"] or x.shape[0] != self.meta[
                    "height"]:
                x = self.augmenter.pad_to_size(x,
                                               width=self.meta["width"],
                                               height=self.meta["height"])
            return self.augmenter.add_graychannel(x)
        return x

    def _loadline(self, line, transpose=True, augmentable=False):
        l = len(line["truth"])
        y = np.asarray(line["compiled"])
        x = self.load_image(line["path"], augmentable=augmentable)
        return self.augmenter.postprocesss(x), y, l, line["path"]

    def _loadprintline(self, line, transpose=True, augmentable=False):
        y = line["truth"]
        x = self.load_image(line["path"], augmentable=augmentable)
        return self.augmenter.postprocesss(x), [y], 0, line["path"]

    def _load_batch(self,
                    index,
                    batch_size,
                    dataset,
                    with_filepath=False,
                    augmentable=False):
        X = []
        Y = []
        L = []
        F = []

        parseline = self._loadline if not dataset.startswith(
            "print_") else self._loadprintline
        for idx in range(
                index * batch_size,
                min((index + 1) * batch_size, len(self.data[dataset]))):
            x, y, l, f = parseline(self.data[dataset][idx],
                                   self.transpose,
                                   augmentable=augmentable)
            if x is not None:
                X.append(x)
                Y.append(y)
                L.append(l)
                F.append(f)
        if self.data_config.default('dynamic_width', False):
            L = np.asarray(L) + 5
            batch_width = np.max(list(map(lambda _x: _x.shape[1], X)))
            if batch_width < self.max_min_width:
                batch_width = max(batch_width,
                                  np.max(L) * self.min_width_factor)
            X_ = np.zeros((len(X), self.meta["height"], batch_width, 1),
                          dtype=np.int32)
            for idx in range(len(X)):
                X_[idx, 0:X[idx].shape[0], 0:X[idx].shape[1], :] = X[idx]
            X = X_
            Y = np.asarray(Y)
        else:
            X = np.asarray(X)
            Y = np.asarray(Y)
            L = np.asarray(L)
        if not with_filepath:
            return X, Y, L
        else:
            return X, Y, L, F

    def before_epoch(self, subset):
        if self.data_config.default('shuffle_epoch', False):
            if subset in self.unfiltered:
                shuffle(self.unfiltered[subset])
            else:
                shuffle(self.data[subset])
        self._filter_data()

    def generateBatch(self,
                      batch_size,
                      max_batches=0,
                      dataset="train",
                      with_filepath=False,
                      augmentable=False):
        num_batches = self.getBatchCount(batch_size, max_batches, dataset)
        for b in range(num_batches):
            yield self._load_batch(b,
                                   batch_size,
                                   dataset,
                                   with_filepath,
                                   augmentable=augmentable)
        pass

    # deprecated
    def generateEpochs(self,
                       batch_size,
                       num_epochs,
                       max_batches=0,
                       dataset="train",
                       with_filepath=False,
                       augmentable=False):
        for e in range(num_epochs):
            yield self.generateBatch(batch_size,
                                     max_batches=max_batches,
                                     dataset=dataset,
                                     with_filepath=with_filepath,
                                     augmentable=augmentable)

    def getBatchCount(self, batch_size, max_batches=0, dataset="train"):
        total_len = len(self.data[dataset])
        num_batches = int(math.ceil(float(total_len) / batch_size))
        return min(num_batches,
                   max_batches) if max_batches > 0 else num_batches
class Extendable(object):

    _decoded_dense = None
    _decoder = None
    _pred_thresholded = None
    _cer = None
    _accuracy = None

    _tp = None
    _tn = None
    _fn = None
    _fp = None
    _pred_res = None
    _y_res = None

    _sep_acc = None
    _sep_prec = None
    _sep_rec = None
    _sep_f = None

    def __init__(self, **kwargs):
        self.config = Configuration(kwargs.get('config', {}))

    def build_decoded_dense(self, graph):
        if self._decoded_dense is None:
            decoded = self.build_decoder(graph)
            self._decoded_dense = tf.sparse_to_dense(decoded[0].indices,
                                                     decoded[0].dense_shape,
                                                     decoded[0].values,
                                                     tf.constant(-1, tf.int64))
        return self._decoded_dense

    def build_decoder(self, graph):
        if self._decoder is None:
            if self.config['ctc'] == "greedy":
                self._decoder, _ = tf.nn.ctc_greedy_decoder(
                    graph['logits'], graph['l'], merge_repeated=True)
            elif self.config['ctc']:
                self._decoder, _ = tf.nn.ctc_beam_search_decoder(
                    graph['logits'], graph['l'], merge_repeated=True)
        return self._decoder

    def build_pred_thresholding(self, graph):
        if self._pred_thresholded is None:
            self._pred_thresholded = tf.to_int32(
                graph['class_pred'] > self.config.default(
                    'accuracy_threshold', 0.5))
        return self._pred_thresholded

    def build_cer(self, graph):
        if self._cer is None:
            decoded = self.build_decoder(graph)
            self._cer = tf.edit_distance(tf.cast(decoded[0], tf.int32),
                                         tf.cast(graph['y'], tf.int32))
        return self._cer

    def build_accuracy(self, graph):
        if self._accuracy is None:
            predictions = self.build_pred_thresholding(graph)
            equality = tf.equal(predictions, tf.cast(graph['class_y'],
                                                     tf.int32))
            self._accuracy = tf.reduce_mean(tf.cast(equality, tf.float32))
        return self._accuracy

    def build_tp(self, graph):
        if self._tp is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            self._tp = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 0)), 0),
                    tf.float32))
        return self._tp

    def build_fp(self, graph):
        if self._fp is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            self._fp = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 1)), 0),
                    tf.float32))
        return self._fp

    def build_fn(self, graph):
        if self._fn is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            self._fn = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 0)), 1),
                    tf.float32))
        return self._fn

    def build_tn(self, graph):
        if self._tn is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            self._tn = tf.cast(
                tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 1)), 1),
                tf.float32)
        return self._tn

    def build_sep_accuracy(self, graph):
        if self._sep_acc is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            self._sep_acc = tf.reduce_mean(
                tf.cast(tf.equal(pred_res, y_res), tf.float32))
        return self._sep_acc

    def build_sep_recall(self, graph):
        if self._sep_rec is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            tp = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 0)), 0),
                    tf.float32))
            fn = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 0)), 1),
                    tf.float32))
            self._sep_rec = tp / (tp + fn)
        return self._sep_rec

    def build_sep_precision(self, graph):
        if self._sep_prec is None:
            pred_res = self.build_pred_res(graph)
            y_res = self.build_y_res(graph)
            tp = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 0)), 0),
                    tf.float32))
            fp = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.boolean_mask(pred_res, tf.equal(y_res, 1)), 0),
                    tf.float32))
            self._sep_prec = tp / (tp + fp)
        return self._sep_prec

    def build_sep_fmeasure(self, graph):
        if self._sep_f is None:
            prec = self.build_sep_precision(graph)
            rec = self.build_sep_recall(graph)
            self._sep_f = (tf.constant(2.0) * prec * rec) / (prec + rec)
        return self._sep_f

    def build_pred_res(self, graph):
        if self._pred_res is None:
            self._pred_res = tf.argmax(graph['output'], 3)
        return self._pred_res

    def build_y_res(self, graph):
        if self._y_res is None:
            self._y_res = tf.argmax(graph['y'], 3)
        return self._y_res