def __call__(self, batch):
        images, labels = zip(*batch)
        new_images = []
        imgH = self.imgH
        imgW = self.imgW
        if self.keep_ratio:
            ratios = []
            for img in images:
                nheight = self.imgH
                nwidth = self.imgW
                if nheight is None:
                    nheight = img.shape[0]
                if nwidth is None:
                    nwidth = int(
                        np.random.uniform(.8, 1.2) * img.shape[1] * nheight /
                        img.shape[0])

                img = image_resize(img, height=nheight - 16, width=nwidth)
                cv2.imwrite(
                    '/HOME/pondenka/manuel/CycleGANRD/HTR_ctc/data/test.png',
                    img)
                img = img[np.newaxis, ...]
                # img = centered(img, (nheight, int(1.2 * nwidth) + 32))
                #img = Variable(torch.Tensor(img).float().unsqueeze(0))
                new_images.append(img)

        return Variable(torch.Tensor(new_images)), labels
Beispiel #2
0
    def __getitem__(self, index):

        img = self.data[index][0]

        #img = cv2.Canny((255 * img).astype(np.uint8),100,200) / 255.0

        transcr = " " + self.data[index][1] + " "
        #transcr = self.data[index][1]

        fheight, fwidth = self.fixed_size[0], self.fixed_size[1]

        if self.set == 'train':
            # random resize at training !!!
            nwidth = int(np.random.uniform(.75, 1.25) * img.shape[1])
            nheight = int(
                (np.random.uniform(.9, 1.1) * img.shape[0] / img.shape[1]) *
                nwidth)
        else:
            nheight, nwidth = img.shape[0], img.shape[1]

        #nheight, nwidth = img.shape[0], img.shape[1]

        #nheight, nwidth = int(.85 * img.shape[0]), int(.85 * img.shape[1])

        # small pads!!
        nheight, nwidth = max(4, min(fheight - 32,
                                     nheight)), max(8,
                                                    min(fwidth - 64, nwidth))
        #nheight, nwidth = fheight-32, fwidth-64
        img = image_resize(img,
                           height=int(1.0 * nheight),
                           width=int(1.0 * nwidth))

        img = centered(img, (fheight, fwidth), border_value=0.0)

        if self.transforms is not None:
            for tr in self.transforms:
                if np.random.rand() < .5:
                    img = tr(img)

        # pad with zeroes
        #img = centered(img, (fheight, fwidth), np.random.uniform(.2, .8, size=2), border_value=0.0)
        img = torch.Tensor(img).float().unsqueeze(0)

        #if self.transforms is not None:
        #    for tr in self.transforms:
        #        if np.random.rand() < .5:
        #            img = tr(img.unsqueeze(0))[0]

        return img, transcr
    def __getitem__(self, index):

        img = self.data[index][0].squeeze()
        transcr = self.data[index][1]


        # add augmentation
        # ?? elastic, morphology, resizing

        nheight = self.fixed_size[0]
        nwidth = self.fixed_size[1]
        if nheight is None:
            nheight = img.shape[0]

        if nwidth is None:
            nwidth = int(np.random.uniform(.8, 1.2) * img.shape[1] * nheight / img.shape[0])

        #augmentation
        noise = np.random.uniform(.1, 0.25) * self.augment_factor
        blur = np.random.uniform(.5, 2.0) * self.augment_factor

        #img = image_resize(img, height=2000, width=(int(2000/nheight)*nwidth))
        img = rotate(img, angle=np.random.random_integers(-2, 2), mode='constant', cval= 1, resize=True) # rotating

        img = random_noise(img, var=noise ** 2) # adding noise
        img = gaussian(img, sigma=blur, multichannel=True) # blurring


        #end augmentation

        # img = img.resize(int((nheight-16)/2), int(nwidth/2), PIL.Image.NEAREST)
        #
        # img = img.resize((nheight-16, nwidth), PIL.Image.NEAREST)
        if self.set == 'test':
            img = image_resize(img, height=nheight-16, width=nwidth)


        # binarization
        #img = img[:, :, np.newaxis]

        #_, img = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)

            #img = torch.Tensor(img).float().unsqueeze(0)

        return img, transcr
    def __getitem__(self, index):

        img = self.data[index][0]
        transcr = self.data[index][1]

        # add augmentation
        # ?? elastic, morphology, resizing
        if (self.set == 'test2'):
            nheight = self.fixed_size[0]
            nwidth = self.fixed_size[1]
            if nheight is None:
                nheight = img.shape[0]
            if nwidth is None:
                nwidth = int(img.shape[1] * nheight / img.shape[0])

            img = image_resize(img, height=nheight - 16, width=nwidth)
        # img = centered(img, (nheight, int(1.2 * nwidth) + 32))
        #img = torch.Tensor(img).float()

        return img, transcr
Beispiel #5
0
def main_loader(set, level):

    info = gather_iam_info(set, level)

    data = []
    for i, (img_path, transcr) in enumerate(info):

        if i % 1000 == 0:
            print('imgs: [{}/{} ({:.0f}%)]'.format(i, len(info),
                                                   100. * i / len(info)))

        try:
            img = img_io.imread(img_path + '.png')
            img = 1 - img.astype(np.float32) / 255.0
            img = image_resize(img, height=img.shape[0] // 2)
        except:
            print('main_loader error: Could not load {}'.format(img_path))

        data += [(img, transcr.replace("|", " "))]

    return data
Beispiel #6
0
    def __getitem__(self, index):

        img = self.data[index][0]
        transcr = self.data[index][1]


        # add augmentation
        # ?? elastic, morphology, resizing

        nheight = self.fixed_size[0]
        nwidth = self.fixed_size[1]
        if nheight is None:
            nheight = img.shape[0]
        if nwidth is None:
            nwidth = int(np.random.uniform(.8, 1.2) * img.shape[1] * nheight / img.shape[0])

        img = image_resize(img, height=nheight-16, width=nwidth)
        img = centered(img, (nheight, int(1.2 * nwidth) + 32))
        img = torch.Tensor(img).float().unsqueeze(0)

        return img, transcr