コード例 #1
0
    def __init__(self, chars_file, corpus_dir=None, length=None, random=False):
        self.corpus_dir = corpus_dir
        self.length = length
        self.corpus = []
        self.random = random
        self.chars_file = chars_file
        self.charsets = load_chars(chars_file)

        self.load()
コード例 #2
0
def restore_exist_labels(label_path):
    # 如果目标目录存在 labels.txt 则向该目录中追加图片
    start_index = 0
    if os.path.exists(label_path):
        start_index = len(utils.load_chars(label_path))
        print('增量生成图片 %s. 从 %d 开始' % (flags.save_dir, start_index))
    else:
        print('生成图片 %s' % flags.save_dir)
    return start_index
コード例 #3
0
def restore_exist_labels(label_path):
    # 如果目标目录存在 labels.txt 则向该目录中追加图片
    start_index = 0
    if os.path.exists(label_path):
        start_index = len(utils.load_chars(label_path))
        print('Generate more text images in %s. Start index %d' % (flags.save_dir, start_index))
    else:
        print('Generate text images in %s' % flags.save_dir)
    return start_index
コード例 #4
0
    def __init__(self, chars_file, corpus_dir=None, length=None):
        self.corpus_dir = corpus_dir
        self.length = length
        self.corpus = []  # 语料库-字符串列表

        self.chars_file = chars_file
        self.charsets = load_chars(chars_file)

        self.load()
コード例 #5
0
    def __init__(self, chars_file, corpus_dir=None, length=None):
        self.corpus_dir = corpus_dir
        self.length = length

        self.charsets = load_chars(chars_file)

        if not isinstance(self, RandomCorpus):
            print("Loading corpus from: " + self.corpus_dir)

        self.load()
コード例 #6
0
    def __init__(self, chars_file):
        self.chars = ''.join(load_chars(chars_file))
        # char_set_length + ctc_blank
        self.num_classes = len(self.chars) + 1

        self.encode_maps = {}
        self.decode_maps = {}

        self.create_encode_decode_maps(self.chars)

        print('Load chars file: %s num_classes: %d + 1(CTC Black)' % (chars_file, self.num_classes - 1))
コード例 #7
0
    def __init__(self,
                 chars_file,
                 corpus_dir=None,
                 length=None,
                 max_length=None,
                 distribute_file=None):
        self.corpus_dir = corpus_dir
        self.length = length
        self.corpus = []

        self.chars_file = chars_file
        self.charsets = load_chars(chars_file)
        self.max_length = max_length

        self.distribute_file = distribute_file

        self.load()
コード例 #8
0
ファイル: corpus.py プロジェクト: zergmk2/text_renderer
    def __init__(self, chars_file, corpus_dir=None, length=None):
        self.corpus_dir = corpus_dir
        self.length = length
        self.corpus = []

        self.chars_file = chars_file
        self.charsets = load_chars(chars_file)

        if not isinstance(self, RandomCorpus):
            print("Loading corpus from: " + self.corpus_dir)
            self.corpus_path = glob.glob(self.corpus_dir + '/**/*.txt',
                                         recursive=True)
            if len(self.corpus_path) == 0:
                print("Corpus not found.")
                exit(-1)

        self.load()
コード例 #9
0
ファイル: font_utils.py プロジェクト: 18721017183/OCR_adjust
def get_unsupported_chars(fonts, chars_file):
    """
    Get fonts unsupported chars by loads/saves font supported chars from cache file
    :param fonts:
    :param chars_file:
    :return: dict
        key -> font_path
        value -> font unsupported chars
    """
    charset = load_chars(chars_file)
    charset = ''.join(charset)
    fonts_chars = get_fonts_chars(fonts, chars_file)
    fonts_unsupported_chars = {}
    for font_path, chars in fonts_chars.items():
        unsupported_chars = list(filter(lambda x: x not in chars, charset))
        fonts_unsupported_chars[font_path] = unsupported_chars
    return fonts_unsupported_chars
コード例 #10
0
ファイル: font_utils.py プロジェクト: 18721017183/OCR_adjust
def get_fonts_chars(fonts, chars_file):
    """
    loads/saves font supported chars from cache file
    :param fonts: list of font path. e.g ['./data/fonts/msyh.ttc']
    :param chars_file: arg from parse_args
    :return: dict
        key -> font_path
        value -> font supported chars
    """
    out = {}

    cache_dir = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../', '.caches'))
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    chars = load_chars(chars_file)
    chars = ''.join(chars)

    for font_path in fonts:
        string = ''.join([font_path, chars])
        file_md5 = md5(string)

        cache_file_path = os.path.join(cache_dir, file_md5)

        if not os.path.exists(cache_file_path):
            ttf = load_font(font_path)
            _, supported_chars = check_font_chars(ttf, chars)
            print('Save font(%s) supported chars(%d) to cache' %
                  (font_path, len(supported_chars)))

            with open(cache_file_path, 'wb') as f:
                pickle.dump(supported_chars, f, pickle.HIGHEST_PROTOCOL)
        else:
            with open(cache_file_path, 'rb') as f:
                supported_chars = pickle.load(f)
            print('Load font(%s) supported chars(%d) from cache' %
                  (font_path, len(supported_chars)))

        out[font_path] = supported_chars

    return out
コード例 #11
0
ファイル: check_font.py プロジェクト: guoxiaolu/text_renderer
                        type=str,
                        default='./data/chars/char_std_5990.txt')
    parser.add_argument(
        '--font_dir',
        type=str,
        default='/Users/guoxiaolu/work/code/text_renderer/data/fonts/chn')
    parser.add_argument(
        '--delete',
        action="store_true",
        default=False,
        help=
        'whether or not to delete font which not full support the chars_file')

    args, _ = parser.parse_known_args()

    charset = load_chars(args.chars_file)
    font_paths = glob.glob(args.font_dir + '/*.*')

    fonts = {}
    for p in font_paths:
        ttf = load_font(p)
        fonts[p] = ttf

    useful_fonts = []
    for k, v in fonts.items():
        try:
            unsupported_chars, _ = check_font_chars(v, charset)
            print("font: %s ,chars unsupported: %d" %
                  (k, len(unsupported_chars)))
            if len(unsupported_chars) < 500:
                print(unsupported_chars)