def next_train_batch(self): """Get the next batch from train partition (yield)""" while True: if self.index['train'] >= self.size['train']: self.index['train'] = 0 index = self.index['train'] until = self.index['train'] + self.batch_size self.index['train'] = until x_train = self.dataset['train']['dt'][index:until] y_train = self.dataset['train']['gt'][index:until] x_train = pp.augmentation(x_train, rotation_range=1.5, scale_range=0.05, height_shift_range=0.025, width_shift_range=0.05, erode_range=5, dilate_range=3) x_train = pp.normalization(x_train) y_train = [self.tokenizer.encode(y) for y in y_train] y_train = pad_sequences(y_train, maxlen=self.tokenizer.maxlen, padding="post") yield (x_train, y_train, [])
def next_train_batch(self): """Get the next batch from train partition (yield)""" while True: if self.index['train'] >= self.size['train']: self.index['train'] = 0 index = self.index['train'] until = self.index['train'] + self.batch_size self.index['train'] = until x_train = self.dataset['train']['dt'][index:until] x_train = pp.augmentation(x_train, rotation_range=1.5, scale_range=0.05, height_shift_range=0.025, width_shift_range=0.05, erode_range=5, dilate_range=3) x_train = pp.normalization(x_train) y_train = [ self.tokenizer.encode(y) for y in self.dataset['train']['gt'][index:until] ] y_train = [ np.pad(y, (0, self.tokenizer.maxlen - len(y))) for y in y_train ] y_train = np.asarray(y_train, dtype=np.int16) yield (x_train, y_train)
def next_train_batch(self): """Get the next batch from train partition (yield)""" while True: if self.train_index >= self.total_train: self.train_index = 0 index = self.train_index until = self.train_index + self.batch_size self.train_index += self.batch_size x_train = self.dataset["train"]["dt"][index:until] y_train = self.dataset["train"]["gt"][index:until] x_train = pp.augmentation(x_train, rotation_range=1.5, scale_range=0.05, height_shift_range=0.025, width_shift_range=0.05, erode_range=5, dilate_range=3) x_train = pp.normalization(x_train) x_train_len = np.asarray([self.max_text_length for _ in range(self.batch_size)]) y_train_len = np.asarray([len(np.trim_zeros(y_train[i])) for i in range(self.batch_size)]) inputs = { "input": x_train, "labels": y_train, "input_length": x_train_len, "label_length": y_train_len } output = {"CTCloss": np.zeros(self.batch_size)} yield (inputs, output)
def getNext(self,train = True): "iterator" self.train = train if self.train == True: j = 0 else: j = 1 while True: if self.currIdx <= len(self.img_partitions[j]): index = self.currIdx until = self.currIdx + self.batchSize else: index = self.currIdx until = len(self.img_partitions[j]) imgs = [pp.preprocess(os.path.join(self.filePath,self.img_partitions[j][i].filePath),self.imgSize) for i in range(index,until)] imgs = pp.augmentation(imgs, rotation_range=1.5, scale_range=0.05, height_shift_range=0.025, width_shift_range=0.05, erode_range=5, dilate_range=3) imgs = pp.normalization(imgs) gtTexts = [self.img_partitions[j][i].gtText for i in range(index,until)] gtTexts = [self.tokenizer.encode(gtTexts[i]) for i in range(len(gtTexts))] gtTexts = [np.pad(i, (0, self.tokenizer.maxlen - len(i))) for i in gtTexts] gtTexts = np.asarray(gtTexts, dtype=np.int16) yield(imgs,gtTexts)
def next_train_batch(self): """Get the next batch from train partition (yield)""" while True: if self.index['train'] >= self.size['train']: self.index['train'] = 0 index = self.index['train'] until = self.index['train'] + self.batch_size self.index['train'] += self.batch_size x_train = self.dataset['train']['dt'][index:until] y_train = self.dataset['train']['gt'][index:until] x_train_len = np.asarray([self.tokenizer.maxlen for _ in range(self.batch_size)]) y_train_len = np.asarray([len(y_train[i]) for i in range(self.batch_size)]) x_train = pp.augmentation(x_train, rotation_range=1.5, scale_range=0.05, height_shift_range=0.025, width_shift_range=0.05, erode_range=5, dilate_range=3) x_train = pp.normalization(x_train) y_train = [self.tokenizer.encode(y) for y in y_train] y_train = pad_sequences(y_train, maxlen=self.tokenizer.maxlen, padding="post") inputs = { "input": x_train, "labels": y_train, "input_length": x_train_len, "label_length": y_train_len } output = {"CTCloss": np.zeros(self.batch_size, dtype=int)} # x, y and sample_weight yield (inputs, output, [])