コード例 #1
0
    def generate_strings(self):
        # Generate strings from respective source
        if self.source == 'string':
            res = self.args['strings']

        elif self.source == 'random':
            res = ko_create_strings_randomly(self.sargs['length'],
                                             self.sargs['allow_variable'],
                                             1000, self.sargs['use_letters'],
                                             self.sargs['use_numbers'],
                                             self.sargs['use_symbols'],
                                             self.args['language'])

        elif self.source == 'dict':
            res = create_strings_from_dict(self.sargs['length'],
                                           self.sargs['allow_variable'], 1000,
                                           self.dict)

        elif self.source == 'wiki':
            res = create_strings_from_wikipedia(self.sargs['minimum_length'],
                                                1000, self.args['language'])

        elif self.source == 'file':
            # 1000??
            res = self.dict

        else:
            raise RuntimeError

        if self.sargs['shuffle']:
            random.shuffle(res)

        return res
コード例 #2
0
def main():
    """
        Description: Main function
    """

    # Argument parsing
    args = parse_arguments()

    # Create the directory if it does not exist.
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Creating word list
    lang_dict = load_dict(args.language)

    # Create font (path) list
    if not args.font:
        fonts = load_fonts(args.language)
    else:
        if os.path.isfile(args.font):
            fonts = [args.font]
        else:
            sys.exit("Cannot open font")

    # Creating synthetic sentences (or word)
    strings = []

    if args.use_wikipedia:
        strings = create_strings_from_wikipedia(args.length, args.count,
                                                args.language)
    elif args.input_file != "":
        strings = create_strings_from_file(args.input_file, args.count)
    elif args.random_sequences:
        strings = create_strings_randomly(
            args.length,
            args.random,
            args.count,
            args.include_letters,
            args.include_numbers,
            args.include_symbols,
            args.language,
        )
        # Set a name format compatible with special characters automatically if they are used
        if args.include_symbols or True not in (
                args.include_letters,
                args.include_numbers,
                args.include_symbols,
        ):
            args.name_format = 2
    else:
        strings = create_strings_from_dict(args.length, args.random,
                                           args.count, lang_dict)

    if args.case == "upper":
        strings = [x.upper() for x in strings]
    if args.case == "lower":
        strings = [x.lower() for x in strings]

    string_count = len(strings)

    p = Pool(args.thread_count)
    for _ in tqdm(
            p.imap_unordered(
                FakeTextDataGenerator.generate_from_tuple,
                zip(
                    [i for i in range(0, string_count)],
                    strings,
                    [
                        fonts[rnd.randrange(0, len(fonts))]
                        for _ in range(0, string_count)
                    ],
                    [args.output_dir] * string_count,
                    [args.format] * string_count,
                    [args.extension] * string_count,
                    [args.skew_angle] * string_count,
                    [args.random_skew] * string_count,
                    [args.blur] * string_count,
                    [args.random_blur] * string_count,
                    [args.background] * string_count,
                    [args.distorsion] * string_count,
                    [args.distorsion_orientation] * string_count,
                    [args.handwritten] * string_count,
                    [args.name_format] * string_count,
                    [args.width] * string_count,
                    [args.alignment] * string_count,
                    [args.text_color] * string_count,
                    [args.orientation] * string_count,
                    [args.space_width] * string_count,
                    [args.character_spacing] * string_count,
                    [args.margins] * string_count,
                    [args.fit] * string_count,
                ),
            ),
            total=args.count,
    ):
        pass
    p.terminate()

    if args.name_format == 2:
        # Create file with filename-to-label connections
        with open(os.path.join(args.output_dir, "labels.txt"),
                  "w",
                  encoding="utf8") as f:
            for i in range(string_count):
                file_name = str(i) + "." + args.extension
                f.write("{} {}\n".format(file_name, strings[i]))
コード例 #3
0
    def test_create_strings_from_dict(self):
        strings = create_strings_from_dict(3, False, 2,
                                           ["TEST", "TEST", "TEST", "TEST"])

        self.assertTrue(len(strings) == 2 and len(strings[0].split(" ")) == 3)
コード例 #4
0
def main():
    """
        Description: Main function
    """

    # Argument parsing
    args = parse_arguments()

    # Create the directory if it does not exist.
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Creating word list

    print("dict2")
    lang_dict = load_dict(args.language)

    # Create font (path) list

    print("load ko fonts")
    fonts = load_fonts(args.language)

    # Creating synthetic sentences (or word)
    strings = []

    strings = create_strings_from_dict(args.length, args.random, args.count,
                                       lang_dict)

    if args.case == "upper":
        strings = [x.upper() for x in strings]
    if args.case == "lower":
        strings = [x.lower() for x in strings]

    string_count = len(strings)
    try:
        fonts = [x for x in fonts if x != '../trdg/fonts/ko/.DS_Store']
        p = Pool(args.thread_count)
        font_temp = [
            fonts[rnd.randrange(0, len(fonts))]
            for _ in range(0, string_count)
        ]

        #assert False
        for _ in tqdm(
                p.imap_unordered(
                    FakeTextDataGenerator.generate_from_tuple,
                    zip(
                        [i for i in range(0, string_count)],
                        strings,
                        font_temp,  #[fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count)],
                        [args.output_dir] * string_count,
                        [args.format] * string_count,
                        [args.extension] * string_count,
                        [args.skew_angle] * string_count,
                        [args.random_skew] * string_count,
                        [args.blur] * string_count,
                        [args.random_blur] * string_count,
                        [args.background] * string_count,
                        [args.distorsion] * string_count,
                        [args.distorsion_orientation] * string_count,
                        [args.name_format] * string_count,
                        [args.width] * string_count,
                        [args.alignment] * string_count,
                        [args.text_color] * string_count,
                        [args.orientation] * string_count,
                        [args.space_width] * string_count,
                        [args.character_spacing] * string_count,
                        [args.margins] * string_count,
                        [args.fit] * string_count,
                        [args.output_mask] * string_count,
                        [args.word_split] * string_count,
                        [args.image_dir] * string_count,
                        [args.stroke_width] * string_count,
                        [args.stroke_fill] * string_count,
                        [args.image_mode] * string_count,
                    ),
                ),
                total=args.count,
        ):
            pass
        p.terminate()
    except:
        print("parameter error!")
        #print(font_temp)

    if args.name_format == 2:
        # Create file with filename-to-label connections
        with open(os.path.join(args.output_dir, "labels.txt"),
                  "w",
                  encoding="utf8") as f:
            for i in range(string_count):
                file_name = str(i) + "." + args.extension
                if args.space_width == 0:
                    file_name = file_name.replace(" ", "")
                f.write("{} {}\n".format(file_name, strings[i]))

    make_mask(args.output_dir, args.save_dir, args.mask_width,
              args.mask_height, args.sn)
コード例 #5
0
def main():
    """
        Description: Main function
    """

    # Argument parsing
    args = parse_arguments()

    # Create the directory if it does not exist.
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Creating word list
    if args.dict:
        lang_dict = []
        if os.path.isfile(args.dict):
            with open(args.dict, "r", encoding="utf8", errors="ignore") as d:
                lang_dict = [l for l in d.read().splitlines() if len(l) > 0]
        else:
            sys.exit("Cannot open dict")
    else:
        lang_dict = load_dict(args.language)

    # Create font (path) list
    if args.font_dir:
        fonts = [
            os.path.join(args.font_dir, p) for p in os.listdir(args.font_dir)
            if os.path.splitext(p)[1] == ".ttf"
        ]
    elif args.font:
        if os.path.isfile(args.font):
            fonts = [args.font]
        else:
            sys.exit("Cannot open font")
    else:
        fonts = load_fonts(args.language)

    # Creating synthetic sentences (or word)
    strings = []

    if args.use_wikipedia:
        strings = create_strings_from_wikipedia(args.length, args.count,
                                                args.language)
    elif args.input_file != "":
        strings = create_strings_from_file(args.input_file, args.count)
    elif args.random_sequences:
        strings = create_strings_randomly(
            args.length,
            args.random,
            args.count,
            args.include_letters,
            args.include_numbers,
            args.include_symbols,
            args.language,
        )
        # Set a name format compatible with special characters automatically if they are used
        if args.include_symbols or True not in (
                args.include_letters,
                args.include_numbers,
                args.include_symbols,
        ):
            args.name_format = 2
    else:
        strings = create_strings_from_dict(args.length, args.random,
                                           args.count, lang_dict)

    # if args.language == "ar":
    #     from arabic_reshaper import ArabicReshaper
    #
    #     arabic_reshaper = ArabicReshaper()
    #     strings = [
    #         " ".join([arabic_reshaper.reshape(w) for w in s.split(" ")[::-1]])
    #         for s in strings
    #     ]
    if args.case == "upper":
        strings = [x.upper() for x in strings]
    if args.case == "lower":
        strings = [x.lower() for x in strings]

    string_count = len(strings)

    p = Pool(args.thread_count)
    for _ in tqdm(
            p.imap_unordered(
                FakeTextDataGenerator.generate_from_tuple,
                zip(
                    [i for i in range(0, string_count)],
                    strings,
                    [
                        fonts[rnd.randrange(0, len(fonts))]
                        for _ in range(0, string_count)
                    ],
                    [args.output_dir] * string_count,
                    [args.format] * string_count,
                    [args.extension] * string_count,
                    [args.skew_angle] * string_count,
                    [args.random_skew] * string_count,
                    [args.blur] * string_count,
                    [args.random_blur] * string_count,
                    [args.background] * string_count,
                    [args.distorsion] * string_count,
                    [args.distorsion_orientation] * string_count,
                    [args.handwritten] * string_count,
                    [args.name_format] * string_count,
                    [args.width] * string_count,
                    [args.alignment] * string_count,
                    [args.text_color] * string_count,
                    [args.orientation] * string_count,
                    [args.space_width] * string_count,
                    [args.character_spacing] * string_count,
                    [args.margins] * string_count,
                    [args.fit] * string_count,
                    [args.output_mask] * string_count,
                    [args.word_split] * string_count,
                    [args.image_dir] * string_count,
                    [args.stroke_width] * string_count,
                    [args.stroke_fill] * string_count,
                    [args.image_mode] * string_count,
                ),
            ),
            total=args.count,
    ):
        pass
    p.terminate()

    if args.name_format == 2:
        # Create file with filename-to-label connections
        with open(os.path.join(args.output_dir, "labels.txt"),
                  "w",
                  encoding="utf8") as f:
            for i in range(string_count):
                file_name = str(i) + "." + args.extension
                if args.space_width == 0:
                    file_name = file_name.replace(" ", "")
                f.write("{} {}\n".format(i, strings[i]))
コード例 #6
0
def main():
    fonts = multiline_load_fonts(font_dir, font)

    lang_dict = []
    if os.path.isfile(dict_dir):
        with open(dict_dir, "r", encoding="utf8", errors="ignore") as d:
            lang_dict = [l for l in d.read().splitlines() if len(l) > 0]

    else:
        sys.exit("Cannot open dict")

    strings1 = create_strings_from_dict(length[0], random_length[0], count,
                                        lang_dict)
    strings2 = create_strings_from_dict(length[1], random_length[1], count,
                                        lang_dict)

    for i in range(count):
        generator1 = GeneratorFromStrings(strings=rnd.choices(strings1),
                                          count=len(strings1),
                                          size=font_size[0],
                                          bounding_box=True,
                                          fonts=fonts,
                                          background_type=1,
                                          text_color="#FFFFFF")
        img1, bbox1, lb1 = unpack_generator(generator1)

        generator2 = GeneratorFromStrings(strings=rnd.choices(strings2),
                                          size=font_size[1],
                                          count=len(strings2),
                                          bounding_box=True,
                                          fonts=fonts,
                                          background_type=1,
                                          text_color="#FFFFFF")
        img2, bbox2, lb2 = unpack_generator(generator2)
        #first_line = copy.deepcopy(rnd.choice(generator1_list))
        #second_line = copy.deepcopy(rnd.choice(generator2_list))
        #img1, bbox1, lb1 = first_line
        #img2, bbox2, lb2 = second_line

        img = generate_background_img(background_img_dir, image_width,
                                      image_height)
        img_draw = ImageDraw.Draw(img)
        x = rnd.randint(0, img.width - max(img1.width, img2.width))
        y = rnd.randint(0, img.height - img1.height - img2.height)

        ###### paste first line ######
        img.paste(img1, (x, y), img1)
        bbox1 = paste_bbox(bbox1, x, y)
        ''' 
        for box in bbox1:
            img_draw.polygon(box, outline = (255, 0, 0), )
        '''
        ###### paste first line ######

        ###### paste second line ######
        img.paste(img2, (x, y + img1.height + line_space), img2)
        bbox2 = paste_bbox(bbox2, x, y + img1.height + line_space)
        '''
        for box in bbox2:
            img_draw.polygon(box, outline = (0, 255, 0), )
        '''
        ###### paste second line ######
        img = img.convert("RGB")
        img.save(out_dir + lb1 + lb2 + ".jpg")

        with open(os.path.join(out_dir, lb1 + lb2 + '.txt'), mode='w') as f:
            for box, p in zip(bbox1 + bbox2, lb1 + lb2):
                f.write(str(box) + p)
                f.write('\r\n')
コード例 #7
0
ファイル: run.py プロジェクト: deepkyu/OCR_datagen
def main():
    """
        Description: Main function
    """

    # Argument parsing
    args = argument_parser().parse_args()

    # Create the directory if it does not exist.
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Create font (path) list
    if args.font_dir:
        fonts = list(glob(str(Path(args.font_dir) / "*.ttf")))
        if len(fonts) == 0:
            raise ValueError("No fonts found from the given directory %s" %
                             args.font_dir)
    elif args.font:
        font_path = resource_dir / 'fonts' / args.font
        if os.path.isfile(str(font_path)):
            fonts = [str(font_path)]
        else:
            sys.exit("Cannot open font")
    else:
        fonts = load_fonts(args.language)

    # Creating synthetic sentences (or word)
    strings = []

    if args.wikipedia:
        strings = create_strings_from_wikipedia(args.length, args.count,
                                                args.language)
    elif args.input_file != "":
        strings = create_strings_from_file(args.input_file, args.count)
    elif args.random:
        strings = ko_create_strings_randomly(
            args.length,
            args.variable_length,
            args.count,
            args.include_letters,
            args.include_numbers,
            args.include_symbols,
            args.language,
        )
        # Set a name format compatible with special characters automatically if they are used
        if args.include_symbols or True not in (
                args.include_letters,
                args.include_numbers,
                args.include_symbols,
        ):
            args.name_format = 2
    elif args.dict:
        # Creating word list
        lang_dict = []
        dict_path = resource_dir / "dicts" / args.dict
        if os.path.isfile(dict_path):
            with open(dict_path, "r", encoding="utf8", errors="ignore") as d:
                lang_dict = [l for l in d.read().splitlines() if len(l) > 0]
        else:
            sys.exit("Cannot open dict")

        strings = create_strings_from_dict(args.length, args.variable_length,
                                           args.count, lang_dict)
    else:
        raise RuntimeError("Source option broke... somehow")

    if args.case == "upper":
        strings = [x.upper() for x in strings]
    if args.case == "lower":
        strings = [x.lower() for x in strings]

    string_count = len(strings)

    p = Pool(args.thread_count)
    for _ in tqdm(
            p.imap_unordered(
                FakeTextDataGenerator.generate_from_tuple,
                zip(
                    [i for i in range(0, string_count)],
                    strings,
                    [
                        fonts[rnd.randrange(0, len(fonts))]
                        for _ in range(0, string_count)
                    ],
                    [args.output_dir] * string_count,
                    [args.format] * string_count,
                    [args.extension] * string_count,
                    [args.skew_angle] * string_count,
                    [args.random_skew] * string_count,
                    [args.blur] * string_count,
                    [args.random_blur] * string_count,
                    [args.background] * string_count,
                    [args.distorsion] * string_count,
                    [args.distorsion_orientation] * string_count,
                    [args.handwritten] * string_count,
                    [args.name_format] * string_count,
                    [args.width] * string_count,
                    [args.alignment] * string_count,
                    [args.text_color] * string_count,
                    [args.orientation] * string_count,
                    [args.space_width] * string_count,
                    [args.character_spacing] * string_count,
                    [args.margins] * string_count,
                    [args.fit] * string_count,
                    [args.output_mask] * string_count,
                    [args.word_split] * string_count,
                    [args.image_dir] * string_count,
                ),
            ),
            total=args.count,
    ):
        pass
    p.terminate()

    if args.name_format == 2:
        # Create file with filename-to-label connections
        with open(os.path.join(args.output_dir, "labels.txt"),
                  "w",
                  encoding="utf8") as f:
            for i in range(string_count):
                file_name = str(i) + "." + args.extension
                f.write("{} {}\n".format(file_name, strings[i]))
コード例 #8
0
ファイル: run.py プロジェクト: sojjeong/KoTDG
def main():
    """
        Description: Main function
    """

    # Argument parsing
    args = argument_parser().parse_args()

    # Create the directory if it does not exist.
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Create font (path) list
    if args.font_dir:
        fonts = list(glob(str(Path(args.font_dir) / "*.ttf")))
        if len(fonts) == 0:
            raise ValueError("No fonts found from the given directory %s" %
                             args.font_dir)
    elif args.font:
        font_path = resource_dir / 'fonts' / args.font
        if os.path.isfile(str(font_path)):
            fonts = [str(font_path)]
        else:
            sys.exit("Cannot open font")
    else:
        fonts = load_fonts(args.language)

    # Creating synthetic sentences (or word)
    strings = []

    if args.wikipedia:
        strings = create_strings_from_wikipedia(args.length, args.count,
                                                args.language)
    elif args.input_file != "":
        strings = create_strings_from_file(args.input_file, args.count)
    elif args.random:
        strings = ko_create_strings_randomly(
            args.length,
            args.variable_length,
            args.count,
            args.include_letters,
            args.include_numbers,
            args.include_symbols,
            args.language,
        )
        # Set a name format compatible with special characters automatically if they are used
        if args.include_symbols or True not in (
                args.include_letters,
                args.include_numbers,
                args.include_symbols,
        ):
            args.name_format = 2
    elif args.dict:
        # Creating word list
        lang_dict = []
        dict_path = resource_dir / "dicts" / args.dict
        if os.path.isfile(dict_path):
            with open(dict_path, "r", encoding="utf8", errors="ignore") as d:
                lang_dict = [l for l in d.read().splitlines() if len(l) > 0]
        else:
            sys.exit("Cannot open dict")

        strings = create_strings_from_dict(args.length, args.variable_length,
                                           args.count, lang_dict)
    else:
        raise RuntimeError("Source option broke... somehow")

    if args.case == "upper":
        strings = [x.upper() for x in strings]
    if args.case == "lower":
        strings = [x.lower() for x in strings]

    if args.rand_color:

        def get_color():
            if rnd.random() < 0.5:
                r, g, b = rnd.getrandbits(5), rnd.getrandbits(
                    5), rnd.getrandbits(5)
            else:
                val = rnd.getrandbits(24)
                r, g, b = val // (2**16), (val //
                                           (2**8)) % (2**8), val % (2**8)
                if r + g + b > 128 * 3:
                    r, g, b = 255 - r, 255 - g, 255 - b
            return "#%02X%02X%02X" % (r, g, b)
    else:

        def get_color():
            return args.text_color

    if args.rand_back:

        def get_back():
            # img_cnt = len(glob(str(resource_dir / 'images/*.jpg')))
            val = rnd.randrange(0, 10)
            return min(3, val)
    else:

        def get_back():
            return args.background

    string_count = len(strings)

    with Pool(args.thread_count) as pool:

        def gen_tuple():
            for idx, text in enumerate(strings):
                yield (idx + args.start, text, fonts[rnd.randrange(
                    0,
                    len(fonts))], args.output_dir, args.format, args.extension,
                       args.skew_angle, args.random_skew, args.blur,
                       args.random_blur, get_back(), args.distorsion,
                       args.distorsion_orientation, args.handwritten,
                       args.name_format, args.width, args.alignment,
                       get_color(), args.orientation, args.space_width,
                       args.character_spacing, args.margins, args.fit,
                       args.output_mask, args.word_split, args.image_dir)

        res = list(
            tqdm(pool.imap_unordered(FakeTextDataGenerator.generate_from_tuple,
                                     gen_tuple()),
                 total=args.count))

    if args.name_format == 2:
        # Create file with filename-to-label connections
        with open(os.path.join(args.output_dir, "labels.txt"),
                  "w",
                  encoding="utf8") as f:
            for i in range(string_count):
                file_name = str(i) + "." + args.extension
                f.write("{} {}\n".format(file_name, strings[i]))