def generate_strings(self): # Generate strings from respective source if self.source == 'string': res = self.args['strings'] elif self.source == 'random': res = ko_create_strings_randomly(self.sargs['length'], self.sargs['allow_variable'], 1000, self.sargs['use_letters'], self.sargs['use_numbers'], self.sargs['use_symbols'], self.args['language']) elif self.source == 'dict': res = create_strings_from_dict(self.sargs['length'], self.sargs['allow_variable'], 1000, self.dict) elif self.source == 'wiki': res = create_strings_from_wikipedia(self.sargs['minimum_length'], 1000, self.args['language']) elif self.source == 'file': # 1000?? res = self.dict else: raise RuntimeError if self.sargs['shuffle']: random.shuffle(res) return res
def main(): """ Description: Main function """ # Argument parsing args = parse_arguments() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Creating word list lang_dict = load_dict(args.language) # Create font (path) list if not args.font: fonts = load_fonts(args.language) else: if os.path.isfile(args.font): fonts = [args.font] else: sys.exit("Cannot open font") # Creating synthetic sentences (or word) strings = [] if args.use_wikipedia: strings = create_strings_from_wikipedia(args.length, args.count, args.language) elif args.input_file != "": strings = create_strings_from_file(args.input_file, args.count) elif args.random_sequences: strings = create_strings_randomly( args.length, args.random, args.count, args.include_letters, args.include_numbers, args.include_symbols, args.language, ) # Set a name format compatible with special characters automatically if they are used if args.include_symbols or True not in ( args.include_letters, args.include_numbers, args.include_symbols, ): args.name_format = 2 else: strings = create_strings_from_dict(args.length, args.random, args.count, lang_dict) if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] string_count = len(strings) p = Pool(args.thread_count) for _ in tqdm( p.imap_unordered( FakeTextDataGenerator.generate_from_tuple, zip( [i for i in range(0, string_count)], strings, [ fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count) ], [args.output_dir] * string_count, [args.format] * string_count, [args.extension] * string_count, [args.skew_angle] * string_count, [args.random_skew] * string_count, [args.blur] * string_count, [args.random_blur] * string_count, [args.background] * string_count, [args.distorsion] * string_count, [args.distorsion_orientation] * string_count, [args.handwritten] * string_count, [args.name_format] * string_count, [args.width] * string_count, [args.alignment] * string_count, [args.text_color] * string_count, [args.orientation] * string_count, [args.space_width] * string_count, [args.character_spacing] * string_count, [args.margins] * string_count, [args.fit] * string_count, ), ), total=args.count, ): pass p.terminate() if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension f.write("{} {}\n".format(file_name, strings[i]))
def test_create_strings_from_dict(self): strings = create_strings_from_dict(3, False, 2, ["TEST", "TEST", "TEST", "TEST"]) self.assertTrue(len(strings) == 2 and len(strings[0].split(" ")) == 3)
def main(): """ Description: Main function """ # Argument parsing args = parse_arguments() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Creating word list print("dict2") lang_dict = load_dict(args.language) # Create font (path) list print("load ko fonts") fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] strings = create_strings_from_dict(args.length, args.random, args.count, lang_dict) if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] string_count = len(strings) try: fonts = [x for x in fonts if x != '../trdg/fonts/ko/.DS_Store'] p = Pool(args.thread_count) font_temp = [ fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count) ] #assert False for _ in tqdm( p.imap_unordered( FakeTextDataGenerator.generate_from_tuple, zip( [i for i in range(0, string_count)], strings, font_temp, #[fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count)], [args.output_dir] * string_count, [args.format] * string_count, [args.extension] * string_count, [args.skew_angle] * string_count, [args.random_skew] * string_count, [args.blur] * string_count, [args.random_blur] * string_count, [args.background] * string_count, [args.distorsion] * string_count, [args.distorsion_orientation] * string_count, [args.name_format] * string_count, [args.width] * string_count, [args.alignment] * string_count, [args.text_color] * string_count, [args.orientation] * string_count, [args.space_width] * string_count, [args.character_spacing] * string_count, [args.margins] * string_count, [args.fit] * string_count, [args.output_mask] * string_count, [args.word_split] * string_count, [args.image_dir] * string_count, [args.stroke_width] * string_count, [args.stroke_fill] * string_count, [args.image_mode] * string_count, ), ), total=args.count, ): pass p.terminate() except: print("parameter error!") #print(font_temp) if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension if args.space_width == 0: file_name = file_name.replace(" ", "") f.write("{} {}\n".format(file_name, strings[i])) make_mask(args.output_dir, args.save_dir, args.mask_width, args.mask_height, args.sn)
def main(): """ Description: Main function """ # Argument parsing args = parse_arguments() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Creating word list if args.dict: lang_dict = [] if os.path.isfile(args.dict): with open(args.dict, "r", encoding="utf8", errors="ignore") as d: lang_dict = [l for l in d.read().splitlines() if len(l) > 0] else: sys.exit("Cannot open dict") else: lang_dict = load_dict(args.language) # Create font (path) list if args.font_dir: fonts = [ os.path.join(args.font_dir, p) for p in os.listdir(args.font_dir) if os.path.splitext(p)[1] == ".ttf" ] elif args.font: if os.path.isfile(args.font): fonts = [args.font] else: sys.exit("Cannot open font") else: fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] if args.use_wikipedia: strings = create_strings_from_wikipedia(args.length, args.count, args.language) elif args.input_file != "": strings = create_strings_from_file(args.input_file, args.count) elif args.random_sequences: strings = create_strings_randomly( args.length, args.random, args.count, args.include_letters, args.include_numbers, args.include_symbols, args.language, ) # Set a name format compatible with special characters automatically if they are used if args.include_symbols or True not in ( args.include_letters, args.include_numbers, args.include_symbols, ): args.name_format = 2 else: strings = create_strings_from_dict(args.length, args.random, args.count, lang_dict) # if args.language == "ar": # from arabic_reshaper import ArabicReshaper # # arabic_reshaper = ArabicReshaper() # strings = [ # " ".join([arabic_reshaper.reshape(w) for w in s.split(" ")[::-1]]) # for s in strings # ] if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] string_count = len(strings) p = Pool(args.thread_count) for _ in tqdm( p.imap_unordered( FakeTextDataGenerator.generate_from_tuple, zip( [i for i in range(0, string_count)], strings, [ fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count) ], [args.output_dir] * string_count, [args.format] * string_count, [args.extension] * string_count, [args.skew_angle] * string_count, [args.random_skew] * string_count, [args.blur] * string_count, [args.random_blur] * string_count, [args.background] * string_count, [args.distorsion] * string_count, [args.distorsion_orientation] * string_count, [args.handwritten] * string_count, [args.name_format] * string_count, [args.width] * string_count, [args.alignment] * string_count, [args.text_color] * string_count, [args.orientation] * string_count, [args.space_width] * string_count, [args.character_spacing] * string_count, [args.margins] * string_count, [args.fit] * string_count, [args.output_mask] * string_count, [args.word_split] * string_count, [args.image_dir] * string_count, [args.stroke_width] * string_count, [args.stroke_fill] * string_count, [args.image_mode] * string_count, ), ), total=args.count, ): pass p.terminate() if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension if args.space_width == 0: file_name = file_name.replace(" ", "") f.write("{} {}\n".format(i, strings[i]))
def main(): fonts = multiline_load_fonts(font_dir, font) lang_dict = [] if os.path.isfile(dict_dir): with open(dict_dir, "r", encoding="utf8", errors="ignore") as d: lang_dict = [l for l in d.read().splitlines() if len(l) > 0] else: sys.exit("Cannot open dict") strings1 = create_strings_from_dict(length[0], random_length[0], count, lang_dict) strings2 = create_strings_from_dict(length[1], random_length[1], count, lang_dict) for i in range(count): generator1 = GeneratorFromStrings(strings=rnd.choices(strings1), count=len(strings1), size=font_size[0], bounding_box=True, fonts=fonts, background_type=1, text_color="#FFFFFF") img1, bbox1, lb1 = unpack_generator(generator1) generator2 = GeneratorFromStrings(strings=rnd.choices(strings2), size=font_size[1], count=len(strings2), bounding_box=True, fonts=fonts, background_type=1, text_color="#FFFFFF") img2, bbox2, lb2 = unpack_generator(generator2) #first_line = copy.deepcopy(rnd.choice(generator1_list)) #second_line = copy.deepcopy(rnd.choice(generator2_list)) #img1, bbox1, lb1 = first_line #img2, bbox2, lb2 = second_line img = generate_background_img(background_img_dir, image_width, image_height) img_draw = ImageDraw.Draw(img) x = rnd.randint(0, img.width - max(img1.width, img2.width)) y = rnd.randint(0, img.height - img1.height - img2.height) ###### paste first line ###### img.paste(img1, (x, y), img1) bbox1 = paste_bbox(bbox1, x, y) ''' for box in bbox1: img_draw.polygon(box, outline = (255, 0, 0), ) ''' ###### paste first line ###### ###### paste second line ###### img.paste(img2, (x, y + img1.height + line_space), img2) bbox2 = paste_bbox(bbox2, x, y + img1.height + line_space) ''' for box in bbox2: img_draw.polygon(box, outline = (0, 255, 0), ) ''' ###### paste second line ###### img = img.convert("RGB") img.save(out_dir + lb1 + lb2 + ".jpg") with open(os.path.join(out_dir, lb1 + lb2 + '.txt'), mode='w') as f: for box, p in zip(bbox1 + bbox2, lb1 + lb2): f.write(str(box) + p) f.write('\r\n')
def main(): """ Description: Main function """ # Argument parsing args = argument_parser().parse_args() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Create font (path) list if args.font_dir: fonts = list(glob(str(Path(args.font_dir) / "*.ttf"))) if len(fonts) == 0: raise ValueError("No fonts found from the given directory %s" % args.font_dir) elif args.font: font_path = resource_dir / 'fonts' / args.font if os.path.isfile(str(font_path)): fonts = [str(font_path)] else: sys.exit("Cannot open font") else: fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] if args.wikipedia: strings = create_strings_from_wikipedia(args.length, args.count, args.language) elif args.input_file != "": strings = create_strings_from_file(args.input_file, args.count) elif args.random: strings = ko_create_strings_randomly( args.length, args.variable_length, args.count, args.include_letters, args.include_numbers, args.include_symbols, args.language, ) # Set a name format compatible with special characters automatically if they are used if args.include_symbols or True not in ( args.include_letters, args.include_numbers, args.include_symbols, ): args.name_format = 2 elif args.dict: # Creating word list lang_dict = [] dict_path = resource_dir / "dicts" / args.dict if os.path.isfile(dict_path): with open(dict_path, "r", encoding="utf8", errors="ignore") as d: lang_dict = [l for l in d.read().splitlines() if len(l) > 0] else: sys.exit("Cannot open dict") strings = create_strings_from_dict(args.length, args.variable_length, args.count, lang_dict) else: raise RuntimeError("Source option broke... somehow") if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] string_count = len(strings) p = Pool(args.thread_count) for _ in tqdm( p.imap_unordered( FakeTextDataGenerator.generate_from_tuple, zip( [i for i in range(0, string_count)], strings, [ fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count) ], [args.output_dir] * string_count, [args.format] * string_count, [args.extension] * string_count, [args.skew_angle] * string_count, [args.random_skew] * string_count, [args.blur] * string_count, [args.random_blur] * string_count, [args.background] * string_count, [args.distorsion] * string_count, [args.distorsion_orientation] * string_count, [args.handwritten] * string_count, [args.name_format] * string_count, [args.width] * string_count, [args.alignment] * string_count, [args.text_color] * string_count, [args.orientation] * string_count, [args.space_width] * string_count, [args.character_spacing] * string_count, [args.margins] * string_count, [args.fit] * string_count, [args.output_mask] * string_count, [args.word_split] * string_count, [args.image_dir] * string_count, ), ), total=args.count, ): pass p.terminate() if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension f.write("{} {}\n".format(file_name, strings[i]))
def main(): """ Description: Main function """ # Argument parsing args = argument_parser().parse_args() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Create font (path) list if args.font_dir: fonts = list(glob(str(Path(args.font_dir) / "*.ttf"))) if len(fonts) == 0: raise ValueError("No fonts found from the given directory %s" % args.font_dir) elif args.font: font_path = resource_dir / 'fonts' / args.font if os.path.isfile(str(font_path)): fonts = [str(font_path)] else: sys.exit("Cannot open font") else: fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] if args.wikipedia: strings = create_strings_from_wikipedia(args.length, args.count, args.language) elif args.input_file != "": strings = create_strings_from_file(args.input_file, args.count) elif args.random: strings = ko_create_strings_randomly( args.length, args.variable_length, args.count, args.include_letters, args.include_numbers, args.include_symbols, args.language, ) # Set a name format compatible with special characters automatically if they are used if args.include_symbols or True not in ( args.include_letters, args.include_numbers, args.include_symbols, ): args.name_format = 2 elif args.dict: # Creating word list lang_dict = [] dict_path = resource_dir / "dicts" / args.dict if os.path.isfile(dict_path): with open(dict_path, "r", encoding="utf8", errors="ignore") as d: lang_dict = [l for l in d.read().splitlines() if len(l) > 0] else: sys.exit("Cannot open dict") strings = create_strings_from_dict(args.length, args.variable_length, args.count, lang_dict) else: raise RuntimeError("Source option broke... somehow") if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] if args.rand_color: def get_color(): if rnd.random() < 0.5: r, g, b = rnd.getrandbits(5), rnd.getrandbits( 5), rnd.getrandbits(5) else: val = rnd.getrandbits(24) r, g, b = val // (2**16), (val // (2**8)) % (2**8), val % (2**8) if r + g + b > 128 * 3: r, g, b = 255 - r, 255 - g, 255 - b return "#%02X%02X%02X" % (r, g, b) else: def get_color(): return args.text_color if args.rand_back: def get_back(): # img_cnt = len(glob(str(resource_dir / 'images/*.jpg'))) val = rnd.randrange(0, 10) return min(3, val) else: def get_back(): return args.background string_count = len(strings) with Pool(args.thread_count) as pool: def gen_tuple(): for idx, text in enumerate(strings): yield (idx + args.start, text, fonts[rnd.randrange( 0, len(fonts))], args.output_dir, args.format, args.extension, args.skew_angle, args.random_skew, args.blur, args.random_blur, get_back(), args.distorsion, args.distorsion_orientation, args.handwritten, args.name_format, args.width, args.alignment, get_color(), args.orientation, args.space_width, args.character_spacing, args.margins, args.fit, args.output_mask, args.word_split, args.image_dir) res = list( tqdm(pool.imap_unordered(FakeTextDataGenerator.generate_from_tuple, gen_tuple()), total=args.count)) if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension f.write("{} {}\n".format(file_name, strings[i]))