def get_none_arabic_words(text): none_arabic = list() ad = AlphabetDetector() for word in text.split(): if not ad.is_arabic(word): none_arabic.append(word) return none_arabic
def Pic_Book(x): im1 = Image.open(r'C:\\Users\\user\\Desktop\\darlusail\\book.jpg') Pic_cell=sheet.cell(x,17).value Title_cell=str(sheet.cell(x,4).value) draw = ImageDraw.Draw(im1) font= ImageFont.truetype('arial.ttf', size=10) # desired size txt = arabic_reshaper.reshape(Title_cell)# starting position of the message message=get_display(txt) color = 'rgb(0, 0, 0)' # black color wrapper = textwrap3.TextWrapper(width=20) # draw the message on the background with text wrapper word_list = wrapper.wrap(text=message) caption_new = '' ad = AlphabetDetector() if (ad.is_arabic(txt)==True or ad.is_latin(txt)!=True):#checks if the text whatever it is has Arabic words print(" the name contains arabic words") for ii in reversed(word_list): caption_new = caption_new + ii + '\n' if(ad.is_latin(txt)==True): print(" the name doesn't contains arabic words") for ii in word_list: caption_new = caption_new + ii + '\n' print(caption_new) font= ImageFont.truetype('arial.ttf', size=10)#define the font for the text color = 'rgb(0, 0, 0)' #w,h = draw.textsize(message, font=font) w,h = draw.textsize(caption_new, font=font) W,H = im1.size #x,y = 0.5*(W-w),0.90*H-h x,y=0.5*(W-w),0.5*H draw.text((x,y), caption_new, fill=color,font=font) size=563,285 #define the dimensions to crop in the next step im1=im1.resize(size,Image.ANTIALIAS) im2=im1.save(r"C:"+Pic_cell) #save the image with the new configurations
def keep_only_arabic(text): ad = AlphabetDetector() clean_lines = list() for line in text.splitlines(): clean_line = list() for word in line.split(): if ad.is_arabic(word): if word.isalpha(): clean_line.append(word) clean_lines.append(' '.join(clean_line)) return '\n'.join(clean_lines)
def keep_text_with_diacritics(text): ad = AlphabetDetector() lines = text.split('\n') result_list = list() for line in lines: clean_line = list() for word in line.split(): if ad.is_arabic(word) and has_diacritics(word): clean_line.append(word) result_list.append(' '.join(clean_line)) return '\n'.join(result_list)
def clean_tweet(tweet): ad = AlphabetDetector() tweet_processor.set_options(tweet_processor.OPT.URL, tweet_processor.OPT.MENTION, tweet_processor.OPT.HASHTAG, tweet_processor.OPT.RESERVED, tweet_processor.OPT.NUMBER) tweet = tweet.lower() tweet = tweet_processor.clean(tweet) tweet = remove_diacritics(tweet) #tweet = remove_repeating_char(tweet) tweet = normalize_arabic(tweet) tweet = keep_only_arabic(tweet.split()) #tweet = tweet.replace("\n", " ").strip() tokens = tokenize(tweet) tokens = [ token if emoticon_re.search(token) else token for token in tokens ] tokens = [token for token in tokens if ad.is_arabic(token)] return ' '.join(tokens)
def keep_only_arabic(words): ad = AlphabetDetector() tokens = [token for token in words if ad.is_arabic(token)] tweet = ' '.join(tokens) return tweet