Beispiel #1
0
 def __init__(self, username, client_id, client_secret, redirect_url,user):
     super(SpotipyModule, self).__init__()
     # initalizes variables
     self.username = username
     self.client_id = client_id
     self.client_secret = client_secret
     self.redirect_url = redirect_url
     self.scope = 'user-library-read user-read-playback-state streaming' \
                  ' playlist-modify-public user-modify-playback-state playlist-modify-private ' \
                  'user-read-playback-position user-read-currently-playing user-read-private'
     self.user = user
     self.spellchecker = spellchecker.SpellChecker(language=u'en', distance=2)
     self.queue_id = None
     self.playing = False
     self.queue_uri = None
     self.queue_changed = False
     self.current_queue = None
     self.playlist_ids = None
     self.playlist_names = None
     self.search_results = None
     self.dur_time = 0
     self.artist = ""
     self.title = ""
     self.picture = qtc.QUrl()
     self.search_list = ListModel(SpotipyModule.SongWrapper)
     self.token = self.generate_token()
     self.queue = self.generate_queue()
     self.devices = self.token.devices()
Beispiel #2
0
    def __init__(self):
        self.stop_words = frozenset(stopwords.words('english'))
        # helper functions for the hashtag calculation
        self.words = open("word_freq.txt").read().split()
        self.wordcost = dict((k, log((i + 1) * log(len(self.words))))
                             for i, k in enumerate(self.words))
        self.maxword = max(len(x) for x in self.words)
        self.spell = spellchecker.SpellChecker()

        # Months dictionary to support our new rule,saving all dates in the same format
        self.month = {
            "jan": "01",
            "january": "01",
            "feb": "02",
            "february": "02",
            "mar": "03",
            "march": "03",
            "apr": "04",
            "april": "04",
            "may": "05",
            "jun": "06",
            "june": "06",
            "jul": "07",
            "july": "07",
            "aug": "08",
            "august": "08",
            "sep": "09",
            "september": "09",
            "october": "10",
            "oct": "10",
            "nov": "11",
            "november": "11",
            "dec": "12",
            "december": "12"
        }
Beispiel #3
0
 def __has_correct_spelling(word: str) -> str:
     spell_checker = spellchecker.SpellChecker()
     corrected_word = spell_checker.correction(word)
     if corrected_word != word:
         return False, corrected_word
     else:
         return True, word
Beispiel #4
0
    def load_spellchecker(self):
        """
        Load the spelling checker, including a built-in extended dictionary and
        optional project-level configurable word dictionary with words not to
        flag as misspelled.
        """
        self.spell = spellchecker.SpellChecker(distance = 1)

        try:
            with resource_stream(__name__, EXTENDED_DICTIONARY) as f:
                self.load_words_from_file(f)
        except FileNotFoundError as e:
            recoverable_error("Couldn't load Dactyl built-in spelling file. " +
                              "Using the default dictionary only.",
                              self.config.bypass_errors, error=e)

        spelling_file = self.config.get("spelling_file", None)
        if not spelling_file:
            logger.debug("No spelling_file provided in config - skipping")
            return
        try:
            with open(spelling_file, "r", encoding="utf-8") as f:
                self.load_words_from_file(f)

        except FileNotFoundError as e:
            recoverable_error("Failed to load spelling_file %s: %s" %
                              (spelling_file, e), self.config.bypass_errors,
                              error=e)
Beispiel #5
0
    def __init__(self, language: str) -> None:
        """Initialize self.

        :param language: language to check the spelling with
        """
        super().__init__(language)
        self._ignored: T.Set[str] = set()
        try:
            self._dict = spellchecker.SpellChecker(language=language)
        except ValueError:
            raise DictNotFound(language)
Beispiel #6
0
def correct_text():
    korean_dictionary_filepath = './my_korean_dictionary.json'
    text_filepath = './Sample-2_0.txt'

    #--------------------
    print('Start loading dictionary...')
    start_time = time.time()
    spell = spellchecker.SpellChecker(
        language='en', distance=1)  # Loads default word frequency list.

    spell.word_frequency.load_dictionary(korean_dictionary_filepath,
                                         encoding='UTF-8')
    print('End loading dictionary: {} secs.'.format(time.time() - start_time))

    #--------------------
    print('Start loading a text file...')
    start_time = time.time()
    try:
        with open(text_filepath, 'r', encoding='UTF-8') as fd:
            data = fd.read()
            words = data.split()
    except FileNotFoundError as ex:
        print('File not found: {}.'.format(text_filepath))
        return
    except UnicodeDecodeError as ex:
        print('Unicode decode error: {}.'.format(text_filepath))
        return
    print('End loading a text file: {} secs.'.format(time.time() - start_time))

    #--------------------
    # Find those words that may be misspelled.
    misspelled = spell.unknown(words)

    print('Words = {}.'.format(words))
    print('Misspelled words = {}.'.format(misspelled))

    for word in misspelled:
        print("Start correcting a word, '{}'...".format(word))
        start_time = time.time()

        # Get the one 'most likely' answer.
        print('spell.correction({}) = {}.'.format(word,
                                                  spell.correction(word)))

        # Get a list of 'likely' options.
        print('spell.candidates({}) = {}.'.format(word,
                                                  spell.candidates(word)))

        print('End correcting a word: {} secs.'.format(time.time() -
                                                       start_time))
Beispiel #7
0
def simple_korean_example():
    spell = spellchecker.SpellChecker(
        language='en', distance=2)  # Loads default word frequency list.

    if True:
        dictionary_filepath = './my_korean_dictionary.json'
        spell.word_frequency.load_dictionary(dictionary_filepath,
                                             encoding='UTF-8')
    else:
        text_filepath = './korean_modern_novel_1_2.txt'
        spell.word_frequency.load_text_file(text_filepath, encoding='UTF-8')

    # Find those words that may be misspelled.
    misspelled = spell.unknown(['천재즈변', '학교', '도시관', '도소관', '요기'])

    for word in misspelled:
        # Get the one 'most likely' answer.
        print('spell.correction({}) = {}.'.format(word,
                                                  spell.correction(word)))

        # Get a list of 'likely' options.
        print('spell.candidates({}) = {}.'.format(word,
                                                  spell.candidates(word)))

    print("spell.word_probability('학교') = {}.".format(
        spell.word_probability('학교')))

    #text_data = "A blue whale went for a swim in the sea. Along it's path it ran into a storm. To avoid the storm it dove deep under the waves."
    #spell.word_frequency.load_text(text_data)

    # If I just want to make sure some words are not flagged as misspelled.
    spell.word_frequency.load_words(['마이크로소프트', '애플', '구글'])
    print("spell.known(['마이크로소프트', '구글', '페이스북']) = {}.".format(
        spell.known(['마이크로소프트', '구글', '페이스북'])))  # Will return both now!
    spell.word_frequency.load_words(['microsoft', 'apple', 'google'])
    print("spell.known(['microsoft', 'google', 'facebook']) = {}.".format(
        spell.known(['microsoft', 'google',
                     'facebook'])))  # Will return both now!

    print('len(spell.word_frequency.dictionary) = {}.'.format(
        len(spell.word_frequency.dictionary)))
    print('spell.word_frequency.total_words = {}.'.format(
        spell.word_frequency.total_words))
    print('spell.word_frequency.unique_words = {}.'.format(
        spell.word_frequency.unique_words))
    print('len(spell.word_frequency.letters) = {}.'.format(
        len(spell.word_frequency.letters)))
    print('spell.word_frequency.longest_word_length = {}.'.format(
        spell.word_frequency.longest_word_length))
Beispiel #8
0
def spellcheck_tokens(tokens, language="en", distance=1):
    '''Spell-checks the provided list of tokens and returns the corrected ones.
    
    Parameters
        ----------
        tokens   : the list of tokens to be spell-checked
        language : the language to perform the spell-check in
        distance : can be set to 1 or 2. It considers all words that are 1 or 2 letter permutations away (i.e. number of letter swaps) and then selects the most frequent one. The creator of the package suggests to use a distance of 1 to avoid silly things happening in longer words.'''

    spell = spellchecker.SpellChecker(language=language, distance=distance)
    counter = 1

    for token in tokens:
        for i, word in enumerate(token):
            token[i] = spell.correction(word)
        if counter % 1000 == 0:
            print(counter, "sentences checked.", end="\r", flush=True)
        counter += 1

    return tokens
def spelling_correction(text):
    # take numbers out of the string if possible
    try:
        text = re.sub("\d+", ' ', text).lower()
    except (AttributeError, TypeError) as error:
        print(type(text))

    # convert string to lowercase and take out some punctuation that is difficult to map to sentiment
    try:
        text = text.replace(',', ' ').replace('.', ' ')
    except (AttributeError, TypeError) as error:
        print(type(text))

    # correct spelling
    spell = spellchecker.SpellChecker()
    misspelled = spell.unknown(word_tokenize(text))
    corrections = [(word, spell.correction(word)) for word in list(misspelled)]
    for correction in corrections:
        text = text.replace(correction[0], correction[1])

    return text
Beispiel #10
0
    def __init__(self, name):
        BasicDictionary.__init__(self, name)

        self._dict = pyspellchecker.SpellChecker(self.name)
        self._dict.word_frequency.load_words(self._customDict)
Beispiel #11
0
import nltk
from nltk.tokenize import word_tokenize
import nltk.corpus
from nltk.corpus import stopwords
import warnings
import pandas as pd
import string
from string import printable
import random
import time
import spellchecker


spell = spellchecker.SpellChecker()
spell.word_frequency.load_words(['technion', 'haifa', 'ziv', 'neve', 'nave', 'shaanan'])
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('gutenberg')
warnings.filterwarnings(action='ignore')

GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey",)
WHATSAPP = ['hi how are you', 'how are you', "what's up", 'hi there', 'sup', 'how are you doing']
WHATSAPP_RESPONSES = ["I'm great! Thanks", 'Wonderful thanks!', 'Everything is great thanks!']
GREETING_RESPONSES = ["hi", "hey", "howdy", "hi there", "hello"]
THANKS_RESPONSES = ["You are welcome!", "Sure, no problem.", "Gladly", "Sure thing!", "Glad to assist."]
POSITIVE_INPUTS = ("yes", "sure", "ok", "yes please", "k", "cool", "great", 'ok cool', 'awesome', 'yup', 'yhh')
NEGATIVE_INPUTS = ("no", "no thanks", "na", "i'll pass", "forget it", "never mind",)
LIMIT_INPUTS = ("up to", "not more than", "maximum", "max")
STOPWORDS_ADD_ON = GREETING_RESPONSES + WHATSAPP + ["find", "i", "need", "to", "please", "want", "apartment",
                                                    "something", "next", "would", "like", "lower", 'less', 'cheaper']
ROOMS_WORD = ['bd', 'rooms', 'room', 'space', 'spaces', 'bed room', 'bed rooms', 'bedroom', 'bedrooms', 'nis', 'shekels']
Beispiel #12
0
def construct_korean_dictionary_example():
    text_filepath = './korean_modern_novel_1_2.txt'
    dictionary_filepath = './my_korean_dictionary.json'

    # In my_korean_dictionary.json:
    #	{
    #		"가": 1,
    #		"나": 2,
    #		"사과": 45,
    #		"자전거": 60
    #	}

    if False:
        import konlpy
        #import nltk

        # Initialize the Java virtual machine (JVM).
        #konlpy.jvm.init_jvm(jvmpath=None, max_heap_size=1024)

        print('Start loading a text file...')
        start_time = time.time()
        try:
            with open(text_filepath, 'r', encoding='UTF-8') as fd:
                text_data = fd.read()
        except FileNotFoundError as ex:
            print('File not found: {}.'.format(text_filepath))
            return
        except UnicodeDecodeError as ex:
            print('Unicode decode error: {}.'.format(text_filepath))
            return
        print('End loading a text file: {} secs.'.format(time.time() -
                                                         start_time))

        # TODO [check] >> Is it good to extract nouns or do POS tagging?
        print('Start preprocessing texts...')
        start_time = time.time()
        #kkma = konlpy.tag.Kkma()
        #text_data = kkma.nouns(text_data)
        okt = konlpy.tag.Okt()
        text_data = okt.nouns(text_data)
        print('End preprocessing texts: {} secs.'.format(time.time() -
                                                         start_time))

        text_data = ' '.join(text_data)

        print('Start saving a Korean dictionary...')
        start_time = time.time()
        spell = spellchecker.SpellChecker(language=None)
        spell.word_frequency.load_text(text_data)
        spell.export(dictionary_filepath, encoding='UTF-8', gzipped=True)
        print('End saving a Korean dictionary: {} secs.'.format(time.time() -
                                                                start_time))
    else:
        print('Start saving a Korean dictionary...')
        start_time = time.time()
        spell = spellchecker.SpellChecker(language=None)
        spell.word_frequency.load_text_file(text_filepath, encoding='UTF-8')
        spell.export(dictionary_filepath, encoding='UTF-8', gzipped=True)
        print('End saving a Korean dictionary: {} secs.'.format(time.time() -
                                                                start_time))

    print('len(spell.word_frequency.dictionary) = {}.'.format(
        len(spell.word_frequency.dictionary)))
    print('spell.word_frequency.total_words = {}.'.format(
        spell.word_frequency.total_words))
    print('spell.word_frequency.unique_words = {}.'.format(
        spell.word_frequency.unique_words))
    print('len(spell.word_frequency.letters) = {}.'.format(
        len(spell.word_frequency.letters)))
    print('spell.word_frequency.longest_word_length = {}.'.format(
        spell.word_frequency.longest_word_length))
Beispiel #13
0
def simple_example():
    spell = spellchecker.SpellChecker(
        language=
        'en',  # Supported languages: 'en', 'es', 'de', 'fr' and 'pt'. Defaults to 'en'.
        local_dictionary=
        None,  # The path to a locally stored word frequency dictionary. If provided, no language will be loaded.
        distance=2,  # The edit distance to use. Defaults to 2.
        tokenizer=None,
        case_sensitive=False)

    # Find those words that may be misspelled.
    misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])

    for word in misspelled:
        # Get the one 'most likely' answer.
        print('spell.correction({}) = {}.'.format(word,
                                                  spell.correction(word)))

        # Get a list of 'likely' options.
        print('spell.candidates({}) = {}.'.format(word,
                                                  spell.candidates(word)))

    print("spell.word_probability('here') = {}.".format(
        spell.word_probability('here')))

    #--------------------
    # If the Word Frequency list is not to your liking, you can add additional text to generate a more appropriate list for your use case.
    spell = spellchecker.SpellChecker()  # Loads default word frequency list.

    if False:
        # In my_dictionary.json
        #	{
        #		"a": 1,
        #		"b": 2,
        #		"apple": 45,
        #		"bike": 60
        #	}
        dictionary_filepath = './my_dictionary.json'
        spell.word_frequency.load_dictionary(dictionary_filepath,
                                             encoding='UTF-8')
    elif False:
        text_filepath = './my_text.txt'
        spell.word_frequency.load_text_file(text_filepath, encoding='UTF-8')
    elif True:
        text_data = "A blue whale went for a swim in the sea. Along it's path it ran into a storm. To avoid the storm it dove deep under the waves."
        spell.word_frequency.load_text(text_data)

    # If I just want to make sure some words are not flagged as misspelled.
    spell.word_frequency.load_words(['microsoft', 'apple', 'google'])
    print("spell.known(['microsoft', 'google', 'facebook']) = {}.".format(
        spell.known(['microsoft', 'google',
                     'facebook'])))  # Will return both now!

    print('len(spell.word_frequency.dictionary) = {}.'.format(
        len(spell.word_frequency.dictionary)))
    print('spell.word_frequency.total_words = {}.'.format(
        spell.word_frequency.total_words))
    print('spell.word_frequency.unique_words = {}.'.format(
        spell.word_frequency.unique_words))
    print('len(spell.word_frequency.letters) = {}.'.format(
        len(spell.word_frequency.letters)))
    print('spell.word_frequency.longest_word_length = {}.'.format(
        spell.word_frequency.longest_word_length))

    print('spell.word_frequency.tokenize(text_data)) = {}.'.format(
        list(spell.word_frequency.tokenize(text_data))))

    print('spell.word_frequency.keys()) = {}.'.format(
        list(word for idx, word in enumerate(spell.word_frequency.keys())
             if idx < 20)))
    print('spell.word_frequency.words()) = {}.'.format(
        list(word for idx, word in enumerate(spell.word_frequency.words())
             if idx < 20)))
    print('spell.word_frequency.items()) = {}.'.format(
        list(word for idx, word in enumerate(spell.word_frequency.items())
             if idx < 20)))

    #--------------------
    # If the words that you wish to check are long, it is recommended to reduce the distance to 1.

    spell = spellchecker.SpellChecker(distance=1)  # Set at initialization.

    # Do some work on longer words.

    spell.distance = 2  # Set the distance parameter back to the default.
Beispiel #14
0
    r'\mu',
    r'\nu',
    r'\xi',
    r'\omicron',
    r'\pi',
    r'\rho',
    r'\sigma',
    r'\tau',
    r'\upsilon',
    r'\phi',
    r'\chi',
    r'\psi',
    r'\omega',
]
#
spell_checker = spellchecker.SpellChecker(distance=1)
spell_checker.word_frequency.remove_words(bad_words_in_spellchecker)
spell_checker.word_frequency.load_words(greek_alphabet_latex_command)
spell_checker.word_frequency.load_words(extra_special_words)


# ---------------------------------------------------------------------------
#
# add program name to system error call
def sys_exit(msg, file_in=None, section_name=None):
    if file_in != None:
        msg += '\nfile = ' + file_in
        if section_name != None:
            msg += ', section = ' + section_name
    sys.exit('bin/extract_md.py:\n' + msg)
Beispiel #15
0
def callback_inline(call):

    if call.data == "start":
        bot.send_message(call.message.chat.id,
                         "Convert into text or simply scan",
                         reply_markup=keyboards.keyboard_2)

    if call.data == "help":
        bot.send_message(
            call.message.chat.id,
            "Documentbot is a simple bot that does scan images from"
            "simple photos or docs. Spell checking with correcting is also "
            "provided, but recommended only with texts contain 10% of mistakes."
            "Have fun:)")

    if (call.data == "scan") or (call.data == "text"):
        if call.data == "scan":
            key = 1
        if call.data == "text":
            key = 2
        bot.send_message(
            call.message.chat.id,
            "OK. Send me a photo of a document. N/B! It has to be with sheet edges."
        )

    if call.data == "ncorrect":
        data = json.load(open("data.json", encoding="utf8"))
        for connect in data:
            if connect["id"] == call.message.chat.id:
                data.remove(connect)
                output = open("data.json", "w")
                json.dump(data, output)
                output.close()

    if call.data == "correct":
        bot.send_message(call.message.chat.id, "processing")
        spell = spellchecker.SpellChecker()
        word = ""
        words = []
        data = json.load(open("data.json"))
        for connect in data:
            if connect["id"] == call.message.chat.id:
                text = connect["text"] + " "
                print("TEXTTT \n" + text)
                for char in text:
                    if (char != " ") and (char != "!") and (char != "?") and (
                            char != ".") and (char != ",") and (char != "\n"):
                        word = word + char
                        #print(word)
                    if char == " " or char == "\n" and word != '':

                        words.append(word)
                        word = ""

                print("all words" + str(words))
                wrong = spell.unknown(words)
                print("wrong words: " + str(wrong))
                for w in wrong:
                    low = w.lower()
                    print(low)
                    rep = spell.correction(low)
                    if (rep != low):
                        while text.find(w) > 0:
                            i = text.find(w)
                            text = text[:i] + rep + " " + text[i + len(w):]
                print("Corrected \n" + text)
                bot.send_message(call.message.chat.id,
                                 "corrected text: \n" + text)

                data.remove(connect)
                output = open("data.json", "w")
                json.dump(data, output)
                output.close()
args = parser.parse_args()

distance = args.distance
language = args.lang
load = args.load
predict = args.predict

if distance is None:
    distance = 2
if language is None:
    language = 'en'
if predict is not None:
    predict = 1

if load == 'hi':
    spell = spellchecker.SpellChecker(distance=distance)
    spell.word_frequency.load_text_file('./resources/hi.txt')
else:
    spell = spellchecker.SpellChecker(language=language, distance=distance)

while 1:
    print(">>", end=' ')
    s = input()
    if s == 'quit':
        print("Bye!")
        break
    s = s.split()
    misspelled = spell.unknown(s)
    correct = s.copy()
    # print(misspelled)
    a = []