Ejemplo n.º 1
0
def get_next_batch(batch_size=128):
    images = []
    to_images = []
    max_width_image = 0
    font_min_length = random.randint(10, 20)
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        # font_length = random.randint(font_min_length-5, font_min_length+5)
        font_length = random.randint(3, 5)
        font_size = random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        rate = random.randint(8, 17) / font_size
        image = utils_pil.resize(image, rate)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    labels = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        labels[i, :] = np.transpose(image_vec)
    return inputs, labels
Ejemplo n.º 2
0
def scan(file):
    image = Image.open(file.stream)
    image = utils_pil.convert_to_gray(image)
    image = np.asarray(image)
    utils.save(image, os.path.join(curr_dir, "test", "p0.png"))
    # image = utils.clearImgGray(image)
    # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png"))
    split_images = utils.splitImg(image)

    ocr_texts = []

    for i, split_image in enumerate(split_images):
        inv_image = utils.img2bwinv(split_image)
        inv_image = utils.clearImg(inv_image)
        image = 255. - split_image
        image = utils.dropZeroEdges(inv_image, image)
        image = utils.resize(image, ocr.image_height)
        image = image / 255.
        ocr_inputs = np.zeros([1, ocr.image_size, ocr.image_size])
        ocr_inputs[0, :] = utils.square_img(
            image, np.zeros([ocr.image_size, ocr.image_size]))

        utils.save(ocr_inputs[0] * 255,
                   os.path.join(curr_dir, "test", "ocr_%s.png" % i))

        ocr_seq_len = np.ones(1) * ocr.SEQ_LENGHT

        start = time.time()
        # p_net_g = session.run(net_g, {inputs: ocr_inputs})
        # p_net_g = np.squeeze(p_net_g, axis=3)

        # debug_net_g = np.copy(p_net_g)
        # for j in range(1):
        #     _t_img = utils.unsquare_img(p_net_g[j], ocr.image_height)
        #     _t_img[_t_img<0] = 0
        #     _t_img = utils.cvTrimImage(_t_img)
        #     _t_img = utils.resize(_t_img, ocr.image_height)
        #     if _t_img.shape[0] * _t_img.shape[1] <= ocr.image_size * ocr.image_size:
        #         p_net_g[j] = utils.square_img(_t_img, np.zeros([ocr.image_size, ocr.image_size]), ocr.image_height)

        # _img = np.vstack((ocr_inputs[0], debug_net_g[0], p_net_g[0]))
        # utils.save(_img * 255, os.path.join(curr_dir,"test","%s.png"%i))

        decoded_list = session.run(res_decoded[0], {
            inputs: ocr_inputs,
            seq_len: ocr_seq_len
        })
        seconds = round(time.time() - start, 2)
        print("filished ocr %s , paid %s seconds" % (i, seconds))
        detected_list = utils.decode_sparse_tensor(decoded_list)
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
Ejemplo n.º 3
0
def get_next_batch_for_srgan(batch_size=128):
    inputs_images = []
    targets_images = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(3, 70)
        font_size = 36  #random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])  #删除了2
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        image = utils_font.get_font_image_from_url(text, font_name, font_size,
                                                   font_mode, font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        image = utils_pil.convert_to_gray(image)
        targets_image = image.copy()
        targets_image = np.asarray(targets_image)
        targets_image = (255. - targets_image) / 255.
        # targets_image = np.reshape(targets_image,[-1])
        # targets_image = np.pad(targets_image,(0, image_size*image_size-np.size(targets_image)),"constant")
        # targets_image = np.reshape(targets_image, [image_size,image_size])
        targets_images.append(targets_image)

        _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5]))
        image = utils_pil.resize_by_height(image, _h)
        image = utils_pil.resize_by_height(image, image_height,
                                           random.random() > 0.5)

        # image,_,_,_,_ = utils_pil.random_space(image)

        image = utils_font.add_noise(image)
        image = np.asarray(image)
        # image = utils.resize(image, height=image_height)
        image = image * random.uniform(0.3, 1)
        if random.random() > 0.5:
            image = (255. - image) / 255.
        else:
            image = image / 255.
        # image = np.reshape(image,[-1])
        # image = np.pad(image,(0, image_size*image_size-np.size(image)),"constant")
        # image = np.reshape(image, [image_size,image_size])
        inputs_images.append(image)

    inputs = np.zeros([batch_size, image_size, image_size])
    for i in range(batch_size):
        inputs[i, :] = utils.square_img(inputs_images[i],
                                        np.zeros([image_size, image_size]))

    targets = np.zeros([batch_size, image_size, image_size])
    for i in range(batch_size):
        targets[i, :] = utils.square_img(targets_images[i],
                                         np.zeros([image_size, image_size]))

    return inputs, targets
Ejemplo n.º 4
0
def get_next_batch_for_srgan(batch_size=128):
    inputs_images  = []
    targets_images = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(4, 5)
        font_size = 36 #random.randint(image_height, 64)    
        font_mode = random.choice([0,1,2,4]) 
        font_hint = random.choice([0,1,2,3,4,5])     #删除了2
        text  = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        image = utils_pil.convert_to_gray(image)
        targets_image = image.copy()

        _h =  random.randint(9, image_height // random.choice([1,1.5,2,2.5]))
        image = utils_pil.resize_by_height(image, _h)        
        image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) 

        targets_image = np.asarray(targets_image)
        # targets_image = utils.resize(targets_image, height=image_height)
        # targets_image = utils_pil.convert_to_bw(targets_image)
        targets_images.append((255. - targets_image) / 255.)

        image = utils_font.add_noise(image)   
        image = np.asarray(image)
        # image = utils.resize(image, height=image_height)
        image = image * random.uniform(0.3, 1)
        if random.random()>0.5:
            image = (255. - image) / 255.
        else:
            image = image / 255.
        inputs_images.append(image)        

        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        if targets_image.shape[1] > max_width_image: 
            max_width_image = targets_image.shape[1]      

    # max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    targets = np.zeros([batch_size, max_width_image, image_height])
    for i in range(batch_size):
        image_vec = utils.img2vec(targets_images[i], height=image_height, width=max_width_image, flatten=False)
        targets[i,:] = np.transpose(image_vec)

    return inputs, targets
Ejemplo n.º 5
0
def get_next_batch_for_res(batch_size=128):
    images = []   
    codes = []
    max_width_image = 0
    info = ""
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        if random.random()>0.5:
            font_size = random.randint(8, 49)    
        else:
            font_size = random.randint(8, 15) 
        font_mode = random.choice([0,1,2,4]) 
        font_hint = random.choice([0,1,2,3,4,5])     #删除了2
        text = random.sample(CHARS, 12)
        text = text+text+[" "," "]
        random.shuffle(text)
        text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])          
        image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint )
        image = utils_pil.resize_by_height(image, image_height, random.random()>0.5)
        image = utils_font.add_noise(image)   
        image = utils_pil.convert_to_gray(image)                   
        image = np.asarray(image)     
        image = utils.resize(image, height=image_height)
        if random.random()>0.5:
            image = (255. - image) / 255.
        else:
            image = image / 255.
        images.append(image)
        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        info = info+"%s\n\r" % utils_font.get_font_url(text, font_name, font_size, font_mode, font_hint)
    max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image * image_height ) // (POOL_SIZE * POOL_SIZE)                
    return inputs, sparse_labels, seq_len, info
Ejemplo n.º 6
0
def main():
    # img = Image.open("D://S4_0.png")
    # img = np.array(img)
    # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 1))
    # img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
    # show(img)    
    # img = cv2.dilate(img, None , iterations=10) 
    # img = cv2.erode(img, None , iterations=10) 
    # show(img)
    import utils_pil
    image = Image.open("/Users/oneleaf/Desktop/test.png")
    image = utils_pil.convert_to_gray(image)
    image = np.asarray(image)
    # utils.save(image, os.path.join(curr_dir,"test","p0.png"))
   # image = utils.clearImgGray(image)    
   # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png"))
    split_images = splitImg(image)
    for img in split_images:
        show(img)
Ejemplo n.º 7
0
def getImage(CHARS, font_name, image_height, font_length, font_size,
             word_dict):
    text = utils_font.get_random_text(CHARS, word_dict, font_length)
    img = utils_font.get_font_image_from_url(text, font_name, font_size)
    img = utils_font.add_noise(img)
    img = utils_pil.convert_to_gray(img)

    w, h = img.size
    _h = random.randint(9, image_height)
    _w = round(w * _h / h)
    img = img.resize((_w, _h), Image.ANTIALIAS)
    img = np.asarray(img)
    #  img = utils.clearBackgroundColor(img)
    img = 1 - img / 255.
    img = utils.dropZeroEdges(img)

    filter = np.random.random(img.shape) - 0.9
    filter = np.maximum(filter, 0)
    img = img + filter * 5
    imin, imax = img.min(), img.max()
    img = (img - imin) / (imax - imin)

    img = utils.resize(img, image_height)
    return text, img
Ejemplo n.º 8
0
def get_next_batch_for_res(batch_size=128):
    inputs_images = []   
    codes = []
    max_width_image = 0
    info = []
    seq_len = np.ones(batch_size)

    for i in range(batch_size):
        serialized_example = next(dataset, None)
        if serialized_example==None:
            raise Exception("has finished train one data file, stop")

        dataset_example.ParseFromString(serialized_example)

        font_name = str(dataset_example.features.feature['font_name'].bytes_list.value[0],  encoding="utf-8")
        font_size = dataset_example.features.feature['font_size'].int64_list.value[0]
        font_mode = dataset_example.features.feature['font_mode'].int64_list.value[0]
        font_hint = dataset_example.features.feature['font_mode'].int64_list.value[0]

        text = str(dataset_example.features.feature['label'].bytes_list.value[0],  encoding="utf-8")
        size = dataset_example.features.feature['size'].int64_list.value
        image = dataset_example.features.feature['image'].bytes_list.value[0]
        image = utils_pil.frombytes(tuple(size), image)

        image = utils_pil.convert_to_gray(image) 
        w, h = size
        if h > image_height:
            image = utils_pil.resize_by_height(image, image_height)  

        image = utils_pil.resize_by_height(image, image_height-random.randint(1,5))
        image, _ = utils_pil.random_space2(image, image,  image_height)
        
        image = utils_font.add_noise(image)   
        image = np.asarray(image) 

        image = utils.resize(image, image_height, MAX_IMAGE_WIDTH)

        if random.random()>0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        if max_width_image < image.shape[1]:
            max_width_image = image.shape[1]
          
        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])                  

        info.append([font_name, str(font_size), str(font_mode), str(font_hint), str(len(text))])
        seq_len[i]=len(text)+1

    # 凑成4的整数倍
    # if max_width_image % 4 > 0:
    #     max_width_image = max_width_image + 4 - max_width_image % 4

    # 如果图片超过最大宽度
    if max_width_image < MAX_IMAGE_WIDTH:
        max_width_image = MAX_IMAGE_WIDTH
        # raise Exception("img width must %s <= %s " % (max_width_image, MAX_IMAGE_WIDTH))

    inputs = np.zeros([batch_size, image_height, max_width_image, 1])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.reshape(image_vec,(image_height, max_width_image, 1))
     
    # print(inputs.shape, len(codes))
    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)

    # max_width_image = math.ceil((max_width_image-3+1.)/2.)
    # max_width_image = math.ceil((max_width_image-3+1.)/1.)
    # max_width_image = math.ceil((max_width_image-3+1.)/2.)
    # max_width_image = math.ceil((max_width_image-3+1.)/1.)
    # max_width_image = math.ceil((max_width_image-3+1.)/2.)

    seq_len = np.ones(batch_size) * SEQ_LENGTH
    # print(inputs.shape, seq_len.shape, [len(l) for l in labels])
    return inputs, sparse_labels, seq_len, info
Ejemplo n.º 9
0
def get_next_batch_for_res(batch_size=128, has_sparse=True, has_onehot=True, \
                            max_width=4096, height=32, need_pad_width_to_max_width=False):
    inputs_images = []
    codes = []
    # 当前这一批图片中的最大宽度
    max_width_image = 0
    info = []
    seq_len = np.ones(batch_size)

    for i in range(batch_size):
        serialized_example = next(dataset, None)
        if serialized_example == None:
            raise Exception("has finished train one data file, stop")

        dataset_example.ParseFromString(serialized_example)

        font_name = str(
            dataset_example.features.feature['font_name'].bytes_list.value[0],
            encoding="utf-8")
        font_size = dataset_example.features.feature[
            'font_size'].int64_list.value[0]
        font_mode = dataset_example.features.feature[
            'font_mode'].int64_list.value[0]
        font_hint = dataset_example.features.feature[
            'font_hint'].int64_list.value[0]

        text = str(
            dataset_example.features.feature['label'].bytes_list.value[0],
            encoding="utf-8")
        size = dataset_example.features.feature['size'].int64_list.value
        image = dataset_example.features.feature['image'].bytes_list.value[0]
        image = utils_pil.frombytes(tuple(size), image)

        # 图旋转灰度
        image = utils_pil.convert_to_gray(image)
        w, h = size
        if h > height:
            image = utils_pil.resize_by_height(image, height)

        # 随机移动图片位置
        image = utils_pil.resize_by_height(image,
                                           height - random.randint(1, 5))
        image, _ = utils_pil.random_space2(image, image, height)

        # 增加噪点
        image = utils_font.add_noise(image)

        # 转为 opencv 格式
        image = np.asarray(image)
        # 默认按高度缩放,如果宽度超过了最大宽度,就按宽度缩放
        image = utils.resize(image, height, max_width)

        # 随机反色并归一化
        if random.random() > 0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        # 记下当前的最大图片宽度
        if max_width_image < image.shape[1]:
            max_width_image = image.shape[1]

        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])
        info.append([
            font_name,
            str(font_size),
            str(font_mode),
            str(font_hint),
            str(len(text))
        ])

    # 凑成4的整数倍
    if max_width_image % 4 > 0:
        max_width_image = max_width_image + 4 - max_width_image % 4

    # 如果图片超过最大宽度,懒得去缩放,直接报异常
    if max_width_image > max_width:
        raise Exception("img width must %s <= %s " %
                        (max_width_image, max_width))

    if need_pad_width_to_max_width:
        max_width_image = max_width

    inputs = np.zeros([batch_size, image_height, max_width_image, 1])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.reshape(image_vec,
                                  (image_height, max_width_image, 1))

    labels = [np.asarray(i) for i in codes]

    sparse_labels = None
    onehot_labels = None
    if has_sparse:
        sparse_labels = utils.sparse_tuple_from(labels)
        sparse_labels = np.array(sparse_labels)
    if has_onehot:
        onehot_labels = []
        for label in labels:
            label_one_hot = np.eye(CLASSES_NUMBER)[label]
            onehot_labels.append(label_one_hot)
        onehot_labels = np.array(onehot_labels)

    return inputs, np.array(labels), sparse_labels, onehot_labels, info
Ejemplo n.º 10
0
def get_next_batch(batch_size=128):
    images = []
    to_images = []
    codes = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        font_size = 36  #random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        # text = random.sample(CHARS, 12)
        # text = text+text
        # random.shuffle(text)
        # text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5]))
        image = utils_pil.resize_by_height(image, _h, random.random() > 0.5)
        image = utils_pil.resize_by_height(image, image_height,
                                           random.random() > 0.5)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    targets = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        targets[i, :] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image *
                                     image_height) // (POOL_SIZE * POOL_SIZE)
    return inputs, targets, sparse_labels, seq_len
Ejemplo n.º 11
0
def get_next_batch_for_res(batch_size=128,
                           add_noise=True,
                           _font_name=None,
                           _font_size=None,
                           _font_mode=None,
                           _font_hint=None):
    inputs_images = []
    codes = []
    max_width_image = 0
    info = []
    for i in range(batch_size):
        font_name = _font_name
        font_size = _font_size
        font_mode = _font_mode
        font_hint = _font_hint
        if font_name == None:
            font_name = random.choice(AllFontNames)
        if font_size == None:
            if random.random() > 0.5:
                font_size = random.randint(9, 49)
            else:
                font_size = random.randint(9, 15)
        if font_mode == None:
            font_mode = random.choice([0, 1, 2, 4])
        if font_hint == None:
            font_hint = random.choice([0, 1, 2, 3, 4, 5])

        while True:
            font_length = random.randint(5, 400)

            # text = random.sample(CHARS, font_length)
            # text = text+text+[" "," "]
            # random.shuffle(text)
            # text = "".join(text).strip()

            text = utils_font.get_random_text(CHARS, eng_world_list,
                                              font_length)
            image = utils_font.get_font_image_from_url(text, font_name,
                                                       font_size, font_mode,
                                                       font_hint)
            temp_image = utils_pil.resize_by_height(image, image_height)
            w, h = temp_image.size
            if w * h < image_size * image_size: break

        image = utils_pil.convert_to_gray(image)
        w, h = image.size
        if h > image_height:
            image = utils_pil.resize_by_height(image, image_height)

        if add_noise and random.random() > 0.5:
            _h = random.randint(9, image_height + 1)
            image = utils_pil.resize_by_height(image, _h)

        image = utils_pil.random_space2(image, image_height)

        if add_noise:
            image = utils_font.add_noise(image)

        image = np.asarray(image)
        # image = utils.resize(image, height=image_height)
        if add_noise:
            image = image * random.uniform(0.3, 1)

        if add_noise and random.random() > 0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])

        info.append(
            [font_name,
             str(font_size),
             str(font_mode),
             str(font_hint)])

    inputs = np.zeros([batch_size, image_size, image_size])
    for i in range(batch_size):
        inputs[i, :] = utils.square_img(inputs_images[i],
                                        np.zeros([image_size, image_size]))

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (image_size * image_size) // (POOL_SIZE *
                                                                  POOL_SIZE)
    return inputs, sparse_labels, seq_len, info
Ejemplo n.º 12
0
def get_next_batch_for_gan(batch_size=128):
    input_images = []
    trim_images = []
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        # font_size = image_height #random.randint(image_height, 64)
        # if font_size==None:
        if random.random() > 0.5:
            font_size = random.randint(9, 49)
        else:
            font_size = random.randint(9, 15)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])  #删除了2
        while True:
            font_length = random.randint(3, 400)
            # text  = utils_font.get_random_text(CHARS, eng_world_list, font_length)
            text = utils_font.get_words_text(CHARS, eng_world_list,
                                             font_length)
            image = utils_font.get_font_image_from_url(text,
                                                       font_name,
                                                       font_size,
                                                       font_mode,
                                                       font_hint,
                                                       trim=False)
            if font_hint in (0, 1, 3, 5):
                clear_trim_image = utils_font.get_font_image_from_url(
                    text, font_name, font_size, font_mode, 0, trim=False)
            else:
                clear_trim_image = utils_font.get_font_image_from_url(
                    text, font_name, font_size, font_mode, 4, trim=False)

            bbox = utils_pil.get_trim_box(clear_trim_image)
            image = image.crop(bbox)
            clear_trim_image = clear_trim_image.crop(bbox)

            temp_image = utils_pil.resize_by_height(image, image_height)
            if clear_trim_image.size[0] != image.size[
                    0] or clear_trim_image.size[1] != image.size[1]:
                print("get size not same,", image.size, clear_trim_image.size,
                      font_name, font_size, font_mode, font_hint)
                continue
            w, h = temp_image.size
            if w * h <= image_size * image_size:
                break

        image = utils_pil.convert_to_gray(image)  #原始图片
        clear_trim_image = utils_pil.convert_to_gray(clear_trim_image)

        w, h = image.size
        if h > image_height:
            image = utils_pil.resize_by_height(image, image_height)
            clear_trim_image = utils_pil.resize_by_size(
                clear_trim_image, image.size)
        source_image = image.copy()

        # 随机缩放下图片
        w, h = image.size
        if random.random() > 0.5 and h < image_height:
            _h = random.randint(h + 1, image_height + 1)
            image = utils_pil.resize_by_height(image, _h,
                                               random.random() > 0.5)
            clear_trim_image = utils_pil.resize_by_size(
                clear_trim_image, image.size)
            if clear_trim_image.size[0] != image.size[
                    0] or clear_trim_image.size[1] != image.size[1]:
                print("random resize get size not same,", image.size,
                      clear_trim_image.size, font_name, font_size, font_mode,
                      font_hint)
        # image = utils_pil.resize_by_height(image, image_height, random.random()>0.5)

        # 随机移动位置 trims_image 为字体实际位置标识
        image, clear_trim_image = utils_pil.random_space2(
            image, clear_trim_image, image_height)

        if clear_trim_image.size[0] != image.size[0] or clear_trim_image.size[
                1] != image.size[1]:
            print("random move space get size not same,", image.size,
                  clear_trim_image.size, font_name, font_size, font_mode,
                  font_hint)

        trims_image = np.asarray(clear_trim_image)
        # 转黑白二值化,降低维度
        trims_image = (255. - trims_image) / 255.
        trim_images.append(trims_image)

        if random.random() > 0.9:
            image = utils_font.add_noise(image)

        image = np.asarray(image)
        image.flags.writeable = True

        if random.random() > 0.9:
            image[image > 200] = 255 * random.uniform(0.5, 1)

        if random.random() > 0.5:
            image = (255. - image) / 255.
        else:
            image = image / 255.
        input_images.append(image)

    inputs = np.zeros([batch_size, image_size, image_size])
    for i in range(batch_size):
        inputs[i, :] = utils.square_img(input_images[i],
                                        np.zeros([image_size, image_size]),
                                        image_height)

    trims = np.zeros([batch_size, image_size, image_size])
    for i in range(batch_size):
        trims[i, :] = utils.square_img(trim_images[i],
                                       np.zeros([image_size, image_size]),
                                       image_height)
        trims[trims == 0] == -1

    return inputs, trims