def text_to_bidi(text): text = normalize_spaces(text) configured_reshaper = ArabicReshaper( configuration={'use_unshaped_instead_of_isolated': True}) reshaped_text = configured_reshaper.reshape(text) bidi_text = get_display(reshaped_text) return bidi_text
def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): font = o['fontfamily'] if o['bold']: font += ' B' if o['italic']: font += ' I' align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT} style = ParagraphStyle(name=uuid.uuid4().hex, fontName=font, fontSize=float(o['fontsize']), leading=float(o['fontsize']), autoLeading="max", textColor=Color(o['color'][0] / 255, o['color'][1] / 255, o['color'][2] / 255), alignment=align_map[o['align']]) text = conditional_escape( self._get_text_content(op, order, o) or "", ).replace("\n", "<br/>\n") # reportlab does not support RTL, ligature-heavy scripts like Arabic. Therefore, we use ArabicReshaper # to resolve all ligatures and python-bidi to switch RTL texts. configuration = { 'delete_harakat': True, 'support_ligatures': False, } reshaper = ArabicReshaper(configuration=configuration) try: text = "<br/>".join( get_display(reshaper.reshape(l)) for l in text.split("<br/>")) except: logger.exception('Reshaping/Bidi fixes failed on string {}'.format( repr(text))) p = Paragraph(text, style=style) w, h = p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm) # p_size = p.wrap(float(o['width']) * mm, 1000 * mm) ad = getAscentDescent(font, float(o['fontsize'])) canvas.saveState() # The ascent/descent offsets here are not really proven to be correct, they're just empirical values to get # reportlab render similarly to browser canvas. if o.get('downward', False): canvas.translate(float(o['left']) * mm, float(o['bottom']) * mm) canvas.rotate(o.get('rotation', 0) * -1) p.drawOn(canvas, 0, -h - ad[1] / 2) else: canvas.translate( float(o['left']) * mm, float(o['bottom']) * mm + h) canvas.rotate(o.get('rotation', 0) * -1) p.drawOn(canvas, 0, -h - ad[1]) canvas.restoreState()
def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): font = o['fontfamily'] if o['bold']: font += ' B' if o['italic']: font += ' I' align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT} style = ParagraphStyle(name=uuid.uuid4().hex, fontName=font, fontSize=float(o['fontsize']), leading=float(o['fontsize']), autoLeading="max", textColor=Color(o['color'][0] / 255, o['color'][1] / 255, o['color'][2] / 255), alignment=align_map[o['align']]) text = re.sub( "<br[^>]*>", "<br/>", bleach.clean(self._get_text_content(op, order, o) or "", tags=["br"], attributes={}, styles=[], strip=True)) # reportlab does not support RTL, ligature-heavy scripts like Arabic. Therefore, we use ArabicReshaper # to resolve all ligatures and python-bidi to switch RTL texts. configuration = { 'delete_harakat': True, 'support_ligatures': False, } reshaper = ArabicReshaper(configuration=configuration) text = "<br/>".join( get_display(reshaper.reshape(l)) for l in text.split("<br/>")) p = Paragraph(text, style=style) p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm) # p_size = p.wrap(float(o['width']) * mm, 1000 * mm) ad = getAscentDescent(font, float(o['fontsize'])) p.drawOn(canvas, float(o['left']) * mm, float(o['bottom']) * mm - ad[1])
def main(): """ Description: Main function """ # Argument parsing args = parse_arguments() # Create the directory if it does not exist. try: os.makedirs(args.output_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Creating word list if args.dict: lang_dict = [] if os.path.isfile(args.dict): with open(args.dict, "r", encoding="utf8", errors="ignore") as d: lang_dict = [l for l in d.read().splitlines() if len(l) > 0] else: sys.exit("Cannot open dict") else: lang_dict = load_dict(args.language) with open('./trdg/dicts/en.txt', 'r', encoding='utf8', errors='ignore') as f: en_dict = [i for i in f.read().splitlines() if len(i) > 0] # Create font (path) list if args.font_dir: fonts = [ os.path.join(args.font_dir, p) for p in os.listdir(args.font_dir) if os.path.splitext(p)[1] == ".ttf" ] elif args.font: if os.path.isfile(args.font): fonts = [args.font] else: sys.exit("Cannot open font") else: fonts = load_fonts(args.language) # Creating synthetic sentences (or word) strings = [] if args.use_wikipedia: print('use_wikipedia') strings = create_strings_from_wikipedia(args.length, args.count, args.language) elif args.input_file != "": print('input_file') strings = create_strings_from_file(args.input_file, args.count) elif args.random_sequences: print('random_sequences') strings = create_strings_randomly( args.length, args.random, args.count, args.include_letters, args.include_numbers, args.include_symbols, args.language, ) # Set a name format compatible with special characters automatically if they are used if args.include_symbols or True not in ( args.include_letters, args.include_numbers, args.include_symbols, ): args.name_format = 2 # else : # print('create_strings_from_dict') # strings = create_strings_from_dict( # args.length, # args.random, # args.count, # lang_dict # ) else: print(make_my_strings) strings = make_my_strings( args.count, lang_dict, en_dict, ) if args.language == "ar": from arabic_reshaper import ArabicReshaper arabic_reshaper = ArabicReshaper() strings = [ " ".join([arabic_reshaper.reshape(w) for w in s.split(" ")[::-1]]) for s in strings ] if args.case == "upper": strings = [x.upper() for x in strings] if args.case == "lower": strings = [x.lower() for x in strings] string_count = len(strings) p = Pool(args.thread_count) for _ in tqdm( p.imap_unordered( FakeTextDataGenerator.generate_from_tuple, zip( [i for i in range(0, string_count)], strings, [ fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count) ], [args.output_dir] * string_count, [args.format] * string_count, [args.extension] * string_count, [args.skew_angle] * string_count, [args.random_skew] * string_count, [args.blur] * string_count, [args.random_blur] * string_count, [args.background] * string_count, [args.distorsion] * string_count, [args.distorsion_orientation] * string_count, [args.handwritten] * string_count, [args.name_format] * string_count, [args.width] * string_count, [args.alignment] * string_count, [args.text_color] * string_count, [args.orientation] * string_count, [args.space_width] * string_count, [args.character_spacing] * string_count, [args.margins] * string_count, [args.fit] * string_count, [args.output_mask] * string_count, [args.word_split] * string_count, [args.image_dir] * string_count, ), ), total=args.count, ): pass p.terminate() if args.name_format == 2: # Create file with filename-to-label connections with open(os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8") as f: for i in range(string_count): file_name = str(i) + "." + args.extension f.write("{}\t{}\n".format(file_name, strings[i]))
def __init__(self, win, text="Hello World", font="", pos=(0.0, 0.0), depth=0, rgb=None, color=(1.0, 1.0, 1.0), colorSpace='rgb', opacity=1.0, contrast=1.0, units="", ori=0.0, height=None, antialias=True, bold=False, italic=False, alignHoriz=None, alignVert=None, alignText='center', anchorHoriz='center', anchorVert='center', fontFiles=(), wrapWidth=None, flipHoriz=False, flipVert=False, languageStyle='LTR', name=None, autoLog=None, autoDraw=False): """ **Performance OBS:** in general, TextStim is slower than many other visual stimuli, i.e. it takes longer to change some attributes. In general, it's the attributes that affect the shapes of the letters: ``text``, ``height``, ``font``, ``bold`` etc. These make the next .draw() slower because that sets the text again. You can make the draw() quick by calling re-setting the text (``myTextStim.text = myTextStim.text``) when you've changed the parameters. In general, other attributes which merely affect the presentation of unchanged shapes are as fast as usual. This includes ``pos``, ``opacity`` etc. The following attribute can only be set at initialization (see further down for a list of attributes which can be changed after initialization): **languageStyle** Apply settings to correctly display content from some languages that are written right-to-left. Currently there are three (case- insensitive) values for this parameter: - ``'LTR'`` is the default, for typical left-to-right, Latin-style languages. - ``'RTL'`` will correctly display text in right-to-left languages such as Hebrew. By applying the bidirectional algorithm, it allows mixing portions of left-to-right content (such as numbers or Latin script) within the string. - ``'Arabic'`` applies the bidirectional algorithm but additionally will _reshape_ Arabic characters so they appear in the cursive, linked form that depends on neighbouring characters, rather than in their isolated form. May also be applied in other scripts, such as Farsi or Urdu, that use Arabic-style alphabets. :Parameters: """ # what local vars are defined (these are the init params) for use by # __repr__ self._initParams = dir() self._initParams.remove('self') """ October 2018: In place to remove the deprecation warning for pyglet.font.Text. Temporary fix until pyglet.text.Label use is identical to pyglet.font.Text. """ warnings.filterwarnings(message='.*text.Label*', action='ignore') super(TextStim, self).__init__(win, units=units, name=name, autoLog=False) if win.blendMode == 'add': logging.warning("Pyglet text does not honor the Window setting " "`blendMode='add'` so 'avg' will be used for the " "text (but objects drawn after can be added)") self._needUpdate = True self._needVertexUpdate = True # use shaders if available by default, this is a good thing self.__dict__['antialias'] = antialias self.__dict__['font'] = font self.__dict__['bold'] = bold self.__dict__['italic'] = italic # NB just a placeholder - real value set below self.__dict__['text'] = '' self.__dict__['depth'] = depth self.__dict__['ori'] = ori self.__dict__['flipHoriz'] = flipHoriz self.__dict__['flipVert'] = flipVert self.__dict__['languageStyle'] = languageStyle if languageStyle.lower() == 'arabic': arabic_config = { 'delete_harakat': False, # if present, retain any diacritics 'shift_harakat_position': True } # shift by 1 to be compatible with the bidi algorithm self.__dict__['arabic_reshaper'] = ArabicReshaper( configuration=arabic_config) self._pygletTextObj = None self.pos = pos # deprecated attributes if alignVert: self.__dict__['alignVert'] = alignVert logging.warning("TextStim.alignVert is deprecated. Use the " "anchorVert attribute instead") # for compatibility, alignText was historically 'left' anchorVert = alignHoriz if alignHoriz: self.__dict__['alignHoriz'] = alignHoriz logging.warning("TextStim.alignHoriz is deprecated. Use alignText " "and anchorHoriz attributes instead") # for compatibility, alignText was historically 'left' alignText, anchorHoriz = alignHoriz, alignHoriz # alignment and anchors self.alignText = alignText self.anchorHoriz = anchorHoriz self.anchorVert = anchorVert # generate the texture and list holders self._listID = GL.glGenLists(1) # pygame text needs a surface to render to: if not self.win.winType in ["pyglet", "glfw"]: self._texID = GL.GLuint() GL.glGenTextures(1, ctypes.byref(self._texID)) # Color stuff self.colorSpace = colorSpace self.color = color if rgb != None: logging.warning( "Use of rgb arguments to stimuli are deprecated. Please " "use color and colorSpace args instead") self.color = Color(rgb, 'rgb') self.__dict__['fontFiles'] = [] self.fontFiles = list(fontFiles) # calls attributeSetter self.setHeight(height, log=False) # calls setFont() at some point # calls attributeSetter without log setAttribute(self, 'wrapWidth', wrapWidth, log=False) self.opacity = opacity self.contrast = contrast # self.width and self._fontHeightPix get set with text and # calcSizeRendered is called self.setText(text, log=False) self._needUpdate = True self.autoDraw = autoDraw # set autoLog now that params have been initialised wantLog = autoLog is None and self.win.autoLog self.__dict__['autoLog'] = autoLog or wantLog if self.autoLog: logging.exp("Created %s = %s" % (self.name, str(self)))
def replace_with_emoji_pdf(text, size): """ Reportlab's Paragraph doesn't accept normal html <image> tag's attributes like 'class', 'alt'. Its a little hack to remove those attrbs """ text = Emoji.to_image(text) text = text.replace('class="emojione"', 'height=%s width=%s' %(size, size)) return re.sub('alt="'+Emoji.shortcode_regexp+'"', '', text) configuration = { 'delete_harakat': False, 'support_ligatures': False, } reshaper = ArabicReshaper(configuration=configuration) class PageNumCanvas(canvas.Canvas): def __init__(self, *args, **kwargs): canvas.Canvas.__init__(self, *args, **kwargs) self.pages = [] def showPage(self): self.pages.append(dict(self.__dict__)) self._startPage() def save(self): page_count = len(self.pages) for page in self.pages: self.__dict__.update(page) self.draw_page_number(page_count)
import spacy import matplotlib.pyplot as plt from urduhack import normalize from arabic_reshaper import ArabicReshaper from bidi.algorithm import get_display from urduhack import stop_words,normalization d = "F:\\Current Semester\\FYP\\OASRU_CLEN\\OASRU\\ResultScripts" configuration = { 'delete_harakat': False, 'support_ligatures': True, 'RIAL SIGN': True, # Replace ر ي ا ل with ﷼ } reshaper = ArabicReshaper(configuration=configuration) text_to_be_reshaped = "ترجمان" text_to_be_reshaped = normalize(text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_characters(text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_combine_characters(text_to_be_reshaped) text_to_be_reshaped = normalization.punctuations_space(text_to_be_reshaped) nlp = spacy.blank("ur") reshaped_text = reshaper.reshape(text_to_be_reshaped) doc = nlp(text_to_be_reshaped) text = [] for each in doc: if str(each) not in str(stop_words.STOP_WORDS): #(each) text.append(str(each)) reshaped_text = ""
import arabic_reshaper text_to_be_reshaped = 'اللغة العربية رائعة' reshaped_text = arabic_reshaper.reshape(text_to_be_reshaped) print(reshaped_text) from arabic_reshaper import ArabicReshaper configuration = { 'delete_harakat': False, 'support_ligatures': True, 'RIAL SIGN': True, # Replace ر ي ا ل with ﷼ } reshaper = ArabicReshaper(configuration=configuration) l1='ل' l2='ا' l3='ر' l4='ي' text_to_be_reshaped = 'ب ﺭ ﻱ ﺕ' # had to split the string for display reshaped_text = reshaper.reshape(text_to_be_reshaped.replace(' ','')) print(reshaped_text)
return v def get_first_scan(op: OrderPosition): scans = list(op.checkins.all()) if scans: return date_format( list(op.checkins.all())[-1].datetime.astimezone( op.order.event.timezone), "SHORT_DATETIME_FORMAT") return "" reshaper = SimpleLazyObject( lambda: ArabicReshaper(configuration={ 'delete_harakat': True, 'support_ligatures': False, })) class Renderer: def __init__(self, event, layout, background_file): self.layout = layout self.background_file = background_file self.variables = get_variables(event) self.images = get_images(event) self.event = event if self.background_file: self.bg_bytes = self.background_file.read() self.bg_pdf = PdfFileReader(BytesIO(self.bg_bytes), strict=False) else: self.bg_bytes = None
def MyWordCloudGen(imgpath, scriptpath, os): # d = "F:\\Current Semester\\FYP\\OASRU_CLEN\\OASRU\\ResultScripts" configuration = { 'delete_harakat': False, 'support_ligatures': True, 'RIAL SIGN': True, # Replace ر ي ا ل with ﷼ } reshaper = ArabicReshaper(configuration=configuration) scripts = os.listdir(scriptpath) scripts.sort(key=lambda x: os.stat(os.path.join(scriptpath, x)).st_mtime) print((scripts)) text_to_be_reshaped = open(path.join(scriptpath, scripts[1]), encoding="UTF-8").read() print(text_to_be_reshaped) text_to_be_reshaped = normalize(text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_characters( text_to_be_reshaped) text_to_be_reshaped = normalization.normalize_combine_characters( text_to_be_reshaped) text_to_be_reshaped = normalization.punctuations_space(text_to_be_reshaped) nlp = spacy.blank("ur") reshaped_text = reshaper.reshape(text_to_be_reshaped) doc = nlp(text_to_be_reshaped) text = [] for each in doc: if str(each) not in str(stop_words.STOP_WORDS): #(each) text.append(str(each)) reshaped_text = "" for each in text: reshaped_text = reshaped_text + " " + each reshaped_text = reshaper.reshape(reshaped_text) from bidi.algorithm import get_display bidi_text = get_display(reshaped_text) fontdir = "D:\\tarjumaan-master\\Urdu_fonts\\" import os plt.figure(figsize=(20, 15), dpi=200) wordcloud = WordCloud(os.getcwd() + "\\Urdu_fonts\\" + "DecoType Thuluth.ttf", width=2000, height=1500, include_numbers=True, stopwords=stop_words.STOP_WORDS, min_font_size=30, background_color="black", margin=0, max_words=200).generate(bidi_text) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.savefig(imgpath + "\\image.png", format="png") plt.show() img = imgpath + "\\" + "image.png" print(img) print("Relative Path", os.path.relpath(img)) img = os.path.relpath(img) return img