Ejemplo n.º 1
0
def Pic_Book(x):
 im1 = Image.open(r'C:\\Users\\user\\Desktop\\darlusail\\book.jpg')  
 Pic_cell=sheet.cell(x,17).value
 Title_cell=str(sheet.cell(x,4).value)
 draw = ImageDraw.Draw(im1)
 font= ImageFont.truetype('arial.ttf', size=10) # desired size
 txt = arabic_reshaper.reshape(Title_cell)# starting position of the message
 message=get_display(txt)
 color = 'rgb(0, 0, 0)' # black color
 wrapper = textwrap3.TextWrapper(width=20) # draw the message on the background with text wrapper
 word_list = wrapper.wrap(text=message) 
 caption_new = ''
 ad = AlphabetDetector()
 if (ad.is_arabic(txt)==True or ad.is_latin(txt)!=True):#checks if the text whatever it is has Arabic words
     print(" the name contains arabic words")
     for ii in reversed(word_list):
         caption_new = caption_new + ii + '\n'
 if(ad.is_latin(txt)==True):
     print(" the name doesn't contains arabic words")
     for ii in word_list:
         caption_new = caption_new + ii + '\n'
 print(caption_new)
 font= ImageFont.truetype('arial.ttf', size=10)#define the font for the text
 color = 'rgb(0, 0, 0)'
 #w,h = draw.textsize(message, font=font)
 w,h = draw.textsize(caption_new, font=font)
 W,H = im1.size
 #x,y = 0.5*(W-w),0.90*H-h
 x,y=0.5*(W-w),0.5*H
 draw.text((x,y), caption_new, fill=color,font=font)
 size=563,285 #define the dimensions to crop in the next step
 im1=im1.resize(size,Image.ANTIALIAS)
 im2=im1.save(r"C:"+Pic_cell) #save the image with the new configurations
Ejemplo n.º 2
0
    def process_word(word):
        ad = AlphabetDetector()
        if not ad.is_latin(word):
            return ''

        banned_fragments = [
            '.html', 'http://', 'https://', '.jpg', '.svg', '.png'
        ]
        for banned in banned_fragments:
            if banned in word:
                return ''

        word = word.lower()
        word = PorterStemmer().stem(word)
        chars = list(punctuation)
        chars = chars + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
        chars = chars + ['®']
        for char in chars:
            word = word.replace(char, '')

        stop_words = list(stopwords.words('english'))
        stop_words += ['aa', 'aaa', 'bb', 'bbb']
        for stop in stop_words:
            if stop == word:
                return ''

        if len(set(word)) == 1:
            return ''

        return word
Ejemplo n.º 3
0
def parse_text(bot, update):
    input = update.message.text
    ad = AlphabetDetector()
    if ad.is_latin(input):
        update.message.reply_text(
            "Right now only cyrillic characters are supported :( \n"
            "Enter a Kyrgyz text:")
    else:
        match_list = run_apertium_tagger(input, mode="text")
        output_list, error_list = [], []
        for word_match in match_list:
            if not word_match:
                continue
            else:
                word_match_1 = str(word_match[1]).replace("J", "Й")
                word_match_1 = word_match_1.replace("j", "й")

                if word_match[2] is not None:
                    output_list.append(word_match_1 + "*" + word_match[2] +
                                       "*")
                else:
                    output_list.append("_" + word_match_1 + "_")
                    error_list.append(word_match_1)

        update.message.reply_text(" ".join(output_list),
                                  parse_mode=ParseMode.MARKDOWN)
        if error_list:
            update.message.reply_text(
                "⚠️ These words were not recognized by the parser ⚠️")
            for word in error_list:
                update.message.reply_text(word)
        update.message.reply_text("*-----*", parse_mode=ParseMode.MARKDOWN)
Ejemplo n.º 4
0
def parse_input(bot, update, user_data):
    input = update.message.text
    ad = AlphabetDetector()
    if ad.is_latin(input):
        update.message.reply_text(
            "Right now only cyrillic characters are supported :( \n"
            "Enter a Kyrgyz word:")
    else:
        result = run_apertium_tagger(input)
        if user_data.setdefault("stem", []):
            user_data["stem"] = []
        result_2 = str(result[2]).replace("J", "Й")
        result_2 = result_2.replace("j", "й")
        user_data['stem'].append(result_2.lower())

        if result[3] is not None:
            user_data['stem'].append(1)
            user_data['stem'].append(result[3])
            reply = result_2 + "*" + result[3] + "*"
            update.message.reply_text(reply, parse_mode=ParseMode.MARKDOWN)
            update.message.reply_text(
                "Look up stem in the dictionary? \n"
                "Press /find or enter next word to continue.")
        else:
            user_data['stem'].append(0)
            update.message.reply_text(result_2)
            update.message.reply_text(
                "Parsing didn't work :( \n"
                "Look up stem in the dictionary? \n"
                "Press /find or enter next word to continue.")
Ejemplo n.º 5
0
def check_case(file):
    lists = file.readlines()
    ad = AlphabetDetector()
    for word in lists:
        if word.isalpha() and ad.is_latin(word) and word.islower() == False:
            print(word)
            return "Please repair the case to lowercase in " + file.name + "."
    return "PASS"
Ejemplo n.º 6
0
def drop_cols_not_in_mapping(df, mapper):
    ad = AlphabetDetector()
    for header in list(df.columns):
        if ad.is_hebrew(header) and clean_text(header) not in list(
                mapper.keys()):
            df = drop_col_if_exists(df, header)

        if ad.is_latin(header) and header not in list(mapper.values()):
            df = drop_col_if_exists(df, header)
    return df
Ejemplo n.º 7
0
def _test_caps_to_camel_case(a, b):
    ad = AlphabetDetector()
    if type(a) is not str:
        assert a == b
    elif type(a) is str and not ad.is_latin(a):
        assert a == b
    elif type(a) is str and len(a) < 4 or a.upper() != a:
        assert a == b
    else:
        assert len(a) == len(b)
        assert a != b
Ejemplo n.º 8
0
Archivo: utils.py Proyecto: zjgkkn/dash
def isLatin(string):
    try:
        ad = AlphabetDetector()
        return ad.is_latin(string)
    except:
        return False
Ejemplo n.º 9
0
pre, ext = os.path.splitext(file)
with open(pre+".pdfdata", 'w') as target_file:
    sentences = []
    for paragraph in txt2paragraph(file):
        paragraph=paragraph.replace('"',' ')
        paragraph=paragraph.replace('”',' ')
        paragraph=paragraph.replace('“',' ')
        paragraph=paragraph.replace('•',' ')
        paragraph=paragraph.replace('?','.')
        paragraph=paragraph.replace('!','.')
        paragraph=paragraph.replace(';',' ')
        paragraph=paragraph.replace(':',' ')
        paragraph=paragraph.replace("’","'")
        paragraph=' '.join(paragraph.split())
        temp = nltk.sent_tokenize(paragraph)
        sentences.extend(temp)
        if len(temp)>1 and temp[-1].endswith('.'):
            paragraph = ""
            for sentence in sentences:
                sentence = filter_non_printable(sentence)
                if ad.is_latin(sentence) and not re.search(r"([\s.!#?^@-\\\|\*,]+\w[\s.!#?^@-\\\|\*,]+)+\w[\s.!#?^@-\\\|\*,]+",sentence) and not re.search(r"\/\d+|\d+\/",sentence):
                    if paragraph!="":
                        paragraph+=" "
                    paragraph += sentence
            if paragraph is not "":
                target_file.write(paragraph + "\n")
            # else:
            #     print(sentences)
            sentences.clear()
os.remove(file)