Beispiel #1
0
def letterSwap(word):
    '''
    Turns latin-like letters in word into cyrillic ones and reverse if fails.
    '''

    ad = AlphabetDetector()
    # latin keys cyr values
    latin_like_cyr = {'a': 'а', 'c': 'с', 'e': 'е', 'o': 'о', 'p': 'р',
                      'y': 'у', 'A': 'А', 'B': 'В', 'C': 'С', 'E': 'Е',
                      'H': 'Н', 'K': 'К', 'M': 'М', 'O': 'О', 'P': 'Р',
                      'T': 'Т', 'X': 'Х'}

    cyr_like_latin = {v: k for k, v in latin_like_cyr.items()}

    for char in latin_like_cyr.keys():
        word = word.replace(char, latin_like_cyr[char])

    if ad.only_alphabet_chars(word, 'CYRILLIC'):
        return word
    else:
        for char in cyr_like_latin:
            word = word.replace(char, cyr_like_latin[char])
        return word
Beispiel #2
0
def cleanText(text):
    '''
     Function checks and repairs words with hidden latin characters in and vv.
     Function assuming that there are only latin and cyrillic characters
     in text.
    '''

    ad = AlphabetDetector()
    st = RussianStemmer()
    is_broken = False

    clean_text = []

    for word in text:
        if ad.only_alphabet_chars(word, 'CYRILLIC'):
            clean_text.append(word)
        elif ad.only_alphabet_chars(word, 'LATIN'):
            clean_text.append(word)
        else:
            is_broken = True
            clean_text.append(letterSwap(word))

    clean_text = [st.stem(word) for word in clean_text]
    return clean_text, is_broken
Beispiel #3
0
 def sameAlphabet(self,vLine):
   ad = AlphabetDetector()
   if len (ad.detect_alphabet(vLine.decode('utf-8'))) <= 1:
     return True
   else:
     return False
Beispiel #4
0
def isLatin(string):
    try:
        ad = AlphabetDetector()
        return ad.is_latin(string)
    except:
        return False
Beispiel #5
0
import lxml.html
import re
from alphabet_detector import AlphabetDetector

ad = AlphabetDetector()
global dictionary
dictionary = {}
global text
text = ''
punct = [',', '!', '。', '”', ':', '?', '“', '……']
global sent
sent = ''

def markdown(i):
    state = 'exists'
    ind = 1
    while state == 'exists':
        workstrng = i[:ind]
        if workstrng in dictionary:
            state = 'exists'
            ind += 1
        else:
            state = 'none'
    mark = workstrng[:-1]
    markup = dictionary[mark]
    transcr = markup[::2]
    transcr = '; '.join(transcr)
    english = markup[1::2]
    english = '; '.join(english)
    markedstr = '<w><ana lex="' + mark + '" transcr=' + transcr + '" sem="' + english + '"/>' + mark + '</w>\n'                  
    global sent 
Beispiel #6
0
import Cutiepii_Robot.modules.sql.locks_sql as sql
from Cutiepii_Robot import dispatcher, DRAGONS, LOGGER
from Cutiepii_Robot.modules.disable import DisableAbleCommandHandler
from Cutiepii_Robot.modules.helper_funcs.chat_status import (
    can_delete,
    is_user_admin,
    user_not_admin,
    is_bot_admin,
    user_admin,
)
from Cutiepii_Robot.modules.log_channel import loggable
from Cutiepii_Robot.modules.connection import connected

from Cutiepii_Robot.modules.helper_funcs.alternate import send_message, typing_action

ad = AlphabetDetector()

LOCK_TYPES = {
    "audio":
    Filters.audio,
    "voice":
    Filters.voice,
    "document":
    Filters.document,
    "video":
    Filters.video,
    "contact":
    Filters.contact,
    "photo":
    Filters.photo,
    "url":