def text_to_bidi(text): text = normalize_spaces(text) configured_reshaper = ArabicReshaper( configuration={'use_unshaped_instead_of_isolated': True}) reshaped_text = configured_reshaper.reshape(text) bidi_text = get_display(reshaped_text) return bidi_text
def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): font = o['fontfamily'] if o['bold']: font += ' B' if o['italic']: font += ' I' align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT} style = ParagraphStyle(name=uuid.uuid4().hex, fontName=font, fontSize=float(o['fontsize']), leading=float(o['fontsize']), autoLeading="max", textColor=Color(o['color'][0] / 255, o['color'][1] / 255, o['color'][2] / 255), alignment=align_map[o['align']]) text = conditional_escape( self._get_text_content(op, order, o) or "", ).replace("\n", "<br/>\n") # reportlab does not support RTL, ligature-heavy scripts like Arabic. Therefore, we use ArabicReshaper # to resolve all ligatures and python-bidi to switch RTL texts. configuration = { 'delete_harakat': True, 'support_ligatures': False, } reshaper = ArabicReshaper(configuration=configuration) try: text = "<br/>".join( get_display(reshaper.reshape(l)) for l in text.split("<br/>")) except: logger.exception('Reshaping/Bidi fixes failed on string {}'.format( repr(text))) p = Paragraph(text, style=style) w, h = p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm) # p_size = p.wrap(float(o['width']) * mm, 1000 * mm) ad = getAscentDescent(font, float(o['fontsize'])) canvas.saveState() # The ascent/descent offsets here are not really proven to be correct, they're just empirical values to get # reportlab render similarly to browser canvas. if o.get('downward', False): canvas.translate(float(o['left']) * mm, float(o['bottom']) * mm) canvas.rotate(o.get('rotation', 0) * -1) p.drawOn(canvas, 0, -h - ad[1] / 2) else: canvas.translate( float(o['left']) * mm, float(o['bottom']) * mm + h) canvas.rotate(o.get('rotation', 0) * -1) p.drawOn(canvas, 0, -h - ad[1]) canvas.restoreState()
def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): font = o['fontfamily'] if o['bold']: font += ' B' if o['italic']: font += ' I' align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT} style = ParagraphStyle(name=uuid.uuid4().hex, fontName=font, fontSize=float(o['fontsize']), leading=float(o['fontsize']), autoLeading="max", textColor=Color(o['color'][0] / 255, o['color'][1] / 255, o['color'][2] / 255), alignment=align_map[o['align']]) text = re.sub( "<br[^>]*>", "<br/>", bleach.clean(self._get_text_content(op, order, o) or "", tags=["br"], attributes={}, styles=[], strip=True)) # reportlab does not support RTL, ligature-heavy scripts like Arabic. Therefore, we use ArabicReshaper # to resolve all ligatures and python-bidi to switch RTL texts. configuration = { 'delete_harakat': True, 'support_ligatures': False, } reshaper = ArabicReshaper(configuration=configuration) text = "<br/>".join( get_display(reshaper.reshape(l)) for l in text.split("<br/>")) p = Paragraph(text, style=style) p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm) # p_size = p.wrap(float(o['width']) * mm, 1000 * mm) ad = getAscentDescent(font, float(o['fontsize'])) p.drawOn(canvas, float(o['left']) * mm, float(o['bottom']) * mm - ad[1])
def main(): """ Description: Main function """ # Argument parsing args = parse_arguments() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Creating word list if args.dict: lang_dict = [] if os.path.isfile(args.dict): with open(args.dict, "r", encoding="utf8", errors="ignore") as d: lang_dict = [l for l in d.read().splitlines() if len(l) > 0] else: sys.exit("Cannot open dict") else: lang_dict = load_dict(args.language) with open('./trdg/dicts/en.txt', 'r', encoding='utf8', errors='ignore') as f: en_dict = [i for i in f.read().splitlines() if len(i) > 0] # Create font (path) list if args.font_dir: fonts = [ os.path.join(args.font_dir, p) for p in os.listdir(args.font_dir) if os.path.splitext(p)[1] == ".ttf" ] elif args.font: if os.path.isfile(args.font): fonts = [args.font] else: sys.exit("Cannot open font") else: fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] if args.use_wikipedia: print('use_wikipedia') strings = create_strings_from_wikipedia(args.length, args.count, args.language) elif args.input_file != "": print('input_file') strings = create_strings_from_file(args.input_file, args.count) elif args.random_sequences: print('random_sequences') strings = create_strings_randomly( args.length, args.random, args.count, args.include_letters, args.include_numbers, args.include_symbols, args.language, ) # Set a name format compatible with special characters automatically if they are used if args.include_symbols or True not in ( args.include_letters, args.include_numbers, args.include_symbols, ): args.name_format = 2 # else : # print('create_strings_from_dict') # strings = create_strings_from_dict( # args.length, # args.random, # args.count, # lang_dict # ) else: print(make_my_strings) strings = make_my_strings( args.count, lang_dict, en_dict, ) if args.language == "ar": from arabic_reshaper import ArabicReshaper arabic_reshaper = ArabicReshaper() strings = [ " ".join([arabic_reshaper.reshape(w) for w in s.split(" ")[::-1]]) for s in strings ] if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] string_count = len(strings) p = Pool(args.thread_count) for _ in tqdm( p.imap_unordered( FakeTextDataGenerator.generate_from_tuple, zip( [i for i in range(0, string_count)], strings, [ fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count) ], [args.output_dir] * string_count, [args.format] * string_count, [args.extension] * string_count, [args.skew_angle] * string_count, [args.random_skew] * string_count, [args.blur] * string_count, [args.random_blur] * string_count, [args.background] * string_count, [args.distorsion] * string_count, [args.distorsion_orientation] * string_count, [args.handwritten] * string_count, [args.name_format] * string_count, [args.width] * string_count, [args.alignment] * string_count, [args.text_color] * string_count, [args.orientation] * string_count, [args.space_width] * string_count, [args.character_spacing] * string_count, [args.margins] * string_count, [args.fit] * string_count, [args.output_mask] * string_count, [args.word_split] * string_count, [args.image_dir] * string_count, ), ), total=args.count, ): pass p.terminate() if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension f.write("{}\t{}\n".format(file_name, strings[i]))
d = "F:\\Current Semester\\FYP\\OASRU_CLEN\\OASRU\\ResultScripts" configuration = { 'delete_harakat': False, 'support_ligatures': True, 'RIAL SIGN': True, # Replace ر ي ا ل with ﷼ } reshaper = ArabicReshaper(configuration=configuration) text_to_be_reshaped = "ترجمان" text_to_be_reshaped = normalize(text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_characters(text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_combine_characters(text_to_be_reshaped) text_to_be_reshaped = normalization.punctuations_space(text_to_be_reshaped) nlp = spacy.blank("ur") reshaped_text = reshaper.reshape(text_to_be_reshaped) doc = nlp(text_to_be_reshaped) text = [] for each in doc: if str(each) not in str(stop_words.STOP_WORDS): #(each) text.append(str(each)) reshaped_text = "" for each in text: reshaped_text = reshaped_text+" "+each reshaped_text = reshaper.reshape(reshaped_text) from bidi.algorithm import get_display
import arabic_reshaper text_to_be_reshaped = 'اللغة العربية رائعة' reshaped_text = arabic_reshaper.reshape(text_to_be_reshaped) print(reshaped_text) from arabic_reshaper import ArabicReshaper configuration = { 'delete_harakat': False, 'support_ligatures': True, 'RIAL SIGN': True, # Replace ر ي ا ل with ﷼ } reshaper = ArabicReshaper(configuration=configuration) l1='ل' l2='ا' l3='ر' l4='ي' text_to_be_reshaped = 'ب ﺭ ﻱ ﺕ' # had to split the string for display reshaped_text = reshaper.reshape(text_to_be_reshaped.replace(' ','')) print(reshaped_text)
def MyWordCloudGen(imgpath, scriptpath, os): # d = "F:\\Current Semester\\FYP\\OASRU_CLEN\\OASRU\\ResultScripts" configuration = { 'delete_harakat': False, 'support_ligatures': True, 'RIAL SIGN': True, # Replace ر ي ا ل with ﷼ } reshaper = ArabicReshaper(configuration=configuration) scripts = os.listdir(scriptpath) scripts.sort(key=lambda x: os.stat(os.path.join(scriptpath, x)).st_mtime) print((scripts)) text_to_be_reshaped = open(path.join(scriptpath, scripts[1]), encoding="UTF-8").read() print(text_to_be_reshaped) text_to_be_reshaped = normalize(text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_characters( text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_combine_characters( text_to_be_reshaped) text_to_be_reshaped = normalization.punctuations_space(text_to_be_reshaped) nlp = spacy.blank("ur") reshaped_text = reshaper.reshape(text_to_be_reshaped) doc = nlp(text_to_be_reshaped) text = [] for each in doc: if str(each) not in str(stop_words.STOP_WORDS): #(each) text.append(str(each)) reshaped_text = "" for each in text: reshaped_text = reshaped_text + " " + each reshaped_text = reshaper.reshape(reshaped_text) from bidi.algorithm import get_display bidi_text = get_display(reshaped_text) fontdir = "D:\\tarjumaan-master\\Urdu_fonts\\" import os plt.figure(figsize=(20, 15), dpi=200) wordcloud = WordCloud(os.getcwd() + "\\Urdu_fonts\\" + "DecoType Thuluth.ttf", width=2000, height=1500, include_numbers=True, stopwords=stop_words.STOP_WORDS, min_font_size=30, background_color="black", margin=0, max_words=200).generate(bidi_text) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.savefig(imgpath + "\\image.png", format="png") plt.show() img = imgpath + "\\" + "image.png" print(img) print("Relative Path", os.path.relpath(img)) img = os.path.relpath(img) return img