コード例 #1
0
def get_none_arabic_words(text):
    none_arabic = list()
    ad = AlphabetDetector()
    for word in text.split():
        if not ad.is_arabic(word):
            none_arabic.append(word)
    return none_arabic
コード例 #2
0
def Pic_Book(x):
 im1 = Image.open(r'C:\\Users\\user\\Desktop\\darlusail\\book.jpg')  
 Pic_cell=sheet.cell(x,17).value
 Title_cell=str(sheet.cell(x,4).value)
 draw = ImageDraw.Draw(im1)
 font= ImageFont.truetype('arial.ttf', size=10) # desired size
 txt = arabic_reshaper.reshape(Title_cell)# starting position of the message
 message=get_display(txt)
 color = 'rgb(0, 0, 0)' # black color
 wrapper = textwrap3.TextWrapper(width=20) # draw the message on the background with text wrapper
 word_list = wrapper.wrap(text=message) 
 caption_new = ''
 ad = AlphabetDetector()
 if (ad.is_arabic(txt)==True or ad.is_latin(txt)!=True):#checks if the text whatever it is has Arabic words
     print(" the name contains arabic words")
     for ii in reversed(word_list):
         caption_new = caption_new + ii + '\n'
 if(ad.is_latin(txt)==True):
     print(" the name doesn't contains arabic words")
     for ii in word_list:
         caption_new = caption_new + ii + '\n'
 print(caption_new)
 font= ImageFont.truetype('arial.ttf', size=10)#define the font for the text
 color = 'rgb(0, 0, 0)'
 #w,h = draw.textsize(message, font=font)
 w,h = draw.textsize(caption_new, font=font)
 W,H = im1.size
 #x,y = 0.5*(W-w),0.90*H-h
 x,y=0.5*(W-w),0.5*H
 draw.text((x,y), caption_new, fill=color,font=font)
 size=563,285 #define the dimensions to crop in the next step
 im1=im1.resize(size,Image.ANTIALIAS)
 im2=im1.save(r"C:"+Pic_cell) #save the image with the new configurations
コード例 #3
0
def keep_only_arabic(text):
    ad = AlphabetDetector()
    clean_lines = list()
    for line in text.splitlines():
        clean_line = list()
        for word in line.split():
            if ad.is_arabic(word):
                if word.isalpha():
                    clean_line.append(word)
        clean_lines.append(' '.join(clean_line))
    return '\n'.join(clean_lines)
コード例 #4
0
def keep_text_with_diacritics(text):
    ad = AlphabetDetector()
    lines = text.split('\n')
    result_list = list()
    for line in lines:
        clean_line = list()
        for word in line.split():
            if ad.is_arabic(word) and has_diacritics(word):
                clean_line.append(word)
        result_list.append(' '.join(clean_line))
    return '\n'.join(result_list)
コード例 #5
0
ファイル: cleaner.py プロジェクト: motazsaad/shami-corpus
def clean_tweet(tweet):
    ad = AlphabetDetector()
    tweet_processor.set_options(tweet_processor.OPT.URL,
                                tweet_processor.OPT.MENTION,
                                tweet_processor.OPT.HASHTAG,
                                tweet_processor.OPT.RESERVED,
                                tweet_processor.OPT.NUMBER)
    tweet = tweet.lower()
    tweet = tweet_processor.clean(tweet)
    tweet = remove_diacritics(tweet)
    #tweet = remove_repeating_char(tweet)
    tweet = normalize_arabic(tweet)
    tweet = keep_only_arabic(tweet.split())
    #tweet = tweet.replace("\n", " ").strip()
    tokens = tokenize(tweet)
    tokens = [
        token if emoticon_re.search(token) else token for token in tokens
    ]
    tokens = [token for token in tokens if ad.is_arabic(token)]
    return ' '.join(tokens)
コード例 #6
0
def keep_only_arabic(words):
    ad = AlphabetDetector()
    tokens = [token for token in words if ad.is_arabic(token)]
    tweet = ' '.join(tokens)
    return tweet