def generate_chinese_images_to_check(obj_size=CHAR_IMG_SIZE, augmentation=False): print("Get font_file_list ...") font_file_list = [ os.path.join(FONT_FILE_DIR, font_name) for font_name in os.listdir(FONT_FILE_DIR) if font_name.lower()[-4:] in (".otf", ".ttf", ".ttc", ".fon") ] # font_file_list = [os.path.join(FONT_FINISHED_DIR, "chinese_fonts_暂时移出/康熙字典体完整版本.otf")] chinese_char_num = len(CHAR2ID_DICT) total_num = len(font_file_list) * chinese_char_num count = 0 for font_file in font_file_list: # 外层循环是字体 font_name = os.path.basename(font_file) font_type = font_name.split(".")[0] # 创建保存该字体图片的目录 font_img_dir = os.path.join(CHAR_IMGS_DIR, font_type) remove_then_makedirs(font_img_dir) for chinese_char, bigger_PIL_img in generate_all_chinese_images_bigger( font_file, image_size=int(obj_size * 1.2)): # 内层循环是字 # 检查生成的灰度图像是否可用,黑底白字 image_data = list(bigger_PIL_img.getdata()) if sum(image_data) < 10: continue if not augmentation: PIL_img = get_standard_image(bigger_PIL_img, obj_size, reverse_color=True) else: PIL_img = get_augmented_image(bigger_PIL_img, obj_size, rotation=True, dilate=False, erode=True, reverse_color=True) # 保存生成的字体图片 image_name = chinese_char + ".jpg" save_path = os.path.join(font_img_dir, image_name) PIL_img.save(save_path, format="jpeg") # 当前进度 count += 1 if count % 200 == 0: print("Progress bar: %.2f%%" % (count * 100 / total_num)) sys.stdout.flush() return
def convert_chinese_images_to_check(obj_size=CHAR_IMG_SIZE, augmentation=True): print("Get total images num ...") font_images_num_list = [ len(os.listdir(os.path.join(EXTERNEL_IMAGES_DIR, content))) for content in os.listdir(EXTERNEL_IMAGES_DIR) if os.path.isdir(os.path.join(EXTERNEL_IMAGES_DIR, content)) ] print("Begin to convert images ...") total_num = sum(font_images_num_list) count = 0 for font_type, image_paths_list in get_external_image_paths( root_dir=EXTERNEL_IMAGES_DIR): # 创建保存该字体图片的目录 font_img_dir = os.path.join(CHAR_IMGS_DIR, font_type) remove_then_makedirs(font_img_dir) for image_path in image_paths_list: # 加载外部图片,将图片调整为正方形 # 为了保证图片旋转时不丢失信息,生成的图片比本来的图片稍微bigger # 为了方便图片的后续处理,图片必须加载为黑底白字,可以用reverse_color来调整 try: bigger_PIL_img = load_external_image_bigger( image_path, white_background=True, reverse_color=True) except OSError: print("The image %s result in OSError !" % image_path) continue if not augmentation: PIL_img = get_standard_image(bigger_PIL_img, obj_size, reverse_color=True) else: PIL_img = get_augmented_image(bigger_PIL_img, obj_size, rotation=True, dilate=False, erode=True, reverse_color=True) # 保存生成的字体图片 image_name = os.path.basename(image_path).split(".")[0] + ".jpg" save_path = os.path.join(font_img_dir, image_name) PIL_img.save(save_path, format="jpeg") # 当前进度 count += 1 if count % 200 == 0: print("Progress bar: %.2f%%" % (count * 100 / total_num)) sys.stdout.flush()
def generate_chinese_images(obj_size=CHAR_IMG_SIZE, num_imgs_per_font=NUM_IMAGES_PER_FONT): print("Get font_file_list ...") font_file_list = [ os.path.join(FONT_FILE_DIR, font_name) for font_name in os.listdir(FONT_FILE_DIR) if font_name.lower()[-4:] in (".otf", ".ttf", ".ttc", ".fon") ] print("Begin to generate images ...") chinese_char_num = len(CHAR2ID_DICT) total_num = len(font_file_list) * chinese_char_num count = 0 for font_file in font_file_list: # 外层循环是字体 font_name = os.path.basename(font_file) font_type = font_name.split(".")[0] # 创建保存该字体图片的目录 save_dir = os.path.join(CHAR_IMGS_DIR, font_type) remove_then_makedirs(save_dir) for chinese_char, bigger_PIL_img in generate_all_chinese_images_bigger( font_file, image_size=int(obj_size * 1.2)): # 内层循环是字 # 检查生成的灰度图像是否可用,黑底白字 image_data = list(bigger_PIL_img.getdata()) if sum(image_data) < 10: continue PIL_img_list = \ [get_augmented_image(bigger_PIL_img, obj_size, rotation=True, dilate=False, erode=True, reverse_color=True) for i in range(num_imgs_per_font)] # 保存生成的字体图片 for index, PIL_img in enumerate(PIL_img_list): image_name = chinese_char + "_" + str(index) + ".jpg" save_path = os.path.join(save_dir, image_name) PIL_img.save(save_path, format="jpeg") # 当前进度 count += 1 if count % 200 == 0: print("Progress bar: %.2f%%" % (count * 100 / total_num)) sys.stdout.flush()