def find_sug_words(evt):
    suget_wrd = ""
    levn_cost = 0
    dict, max = {}, 0
    tmp = 0
    wrd_lst_pair = ""
    len_mispld_wrd = 0
    len_corpus_wrd = 0
    target = ""
    source = ""
    columns = 0
    rows = 0
    m = 0

    #Word_selec.delete(1.0,END)
    value = wrd_lstbox.get(ANCHOR)

    #value = wrd_lstbox.get(wrd_lstbox .curselection())
    print("value", value)

    #Word_selec.insert(INSERT, value)
    #Word_selec.tag_add("start", "1.0", END)
    #Word_selec.tag_config("start",font=("Georgia", "12", "bold"), background="yellow")

    inpt_str = ''.join(Txt_input.get("1.0", END))

    word = Txt_input.get("1.0", END)
    inpt_txt = re.sub("[^\w]", " ", word).split()
    #print("Input_str",inpt_str)

    #new_str = inpt_str.replace("iterate",value)
    #Txt_input.delete(1.0,END)
    #Txt_input.insert(INSERT,inpt_str.replace("iterate",value))
    #print("New string",new_str)

    # Words suggestion for Missing Words

    d = DictWithPWL("en_US", "Word Dicitionary4.txt")
    print(d.check(value))

    chk_status = d.check(value)

    suggst_str = d.suggest(value)

    sugst_lstbx.delete(0, END)
    '''
    for sug in suggst_str:
        
        sugst_lstbx.insert(0,sug)

    #print("Sugest string",suggst_str)
        
    '''
    print("Listbox pressed")
Exemple #2
0
    def setUp(self):
        """
        Verify required modulemd file parameter has been specified, exists,
        and can be loaded. The file name and loaded metadata are saved.
        """
        mdfile = self.params.get('modulemd')
        if mdfile is None:
            self.error("modulemd parameter must be supplied")

        mdfile = str(mdfile)
        if not os.path.isfile(mdfile):
            self.error("modulemd file %s must exist" % mdfile)

        try:
            mmd = modulemd.ModuleMetadata()
            mmd.load(mdfile)
        except Exception as ex:
            self.error(
                "There was an error while processing modulemd file %s: %s" %
                (mdfile, ex))

        # Infer the module name from the mdfile name and check that it is sane
        mdfileModuleName, mdfileExtension = os.path.basename(mdfile).split(
            '.', 1)
        if (mdfileExtension != 'yaml') and (mdfileExtension != 'yml'):
            self.error("modulemd file %s must have a .y[a]ml extension" %
                       mdfile)
        if mmd.name == '':
            # The name can be missing from the metadata because the builder
            # knows how to infer it
            mmd.name = mdfileModuleName
        elif mmd.name != mdfileModuleName:
            self.error(
                "modulemd file name %s and module name %s do not match" %
                (mdfileModuleName, mmd.name))

        self.mdfile = mdfile
        self.mmd = mmd

        try:
            jargonfile = self.params.get('jargonfile')
            if jargonfile is not None:
                jargonfile = str(jargonfile)
                dict = DictWithPWL("en_US", jargonfile)
                for w in self.mmd.name.split('-'):
                    dict.add_to_session(w)
                self.chkr = SpellChecker(dict)
            else:
                self.chkr = SpellChecker("en_US")
        except:
            self.error(
                "Could not initialize spell checker with dictionary %s" % dict)
Exemple #3
0
def checkAllFiles() :
    lang = os.environ.get('POOTLE_LANG')
    spellCheckLang = os.environ.get('SPELLCHECK_LANG')
    if (os.environ.get('POOTLE_LANG') == None) :
        print "The POOTLE_LANG variable is not set!"
        print "Please set it with export POOTLE_LANG=hu before calling this script!"
        print "The variable should match with the language code on the pootle."
        return
    
    if (spellCheckLang == None) :
        print "The SPELLCHECK_LANG variable is not set!"
        print "Please set it with export SPELLCHECK_LANG=hu_HU before calling this script!"
        print "The variable should match with the language code on the pootle."
        return
    
    pwl = DictWithPWL(spellCheckLang, "known_words_database/" +lang+ ".txt")
    chkr = SpellChecker(pwl)

    with open("tools/packagelist.txt") as f:
        fileList = f.readlines()
    
    fileList = [x.strip() for x in fileList] 
    
    for filename in fileList :
        checkFile("translations/"+lang+"/" + filename, chkr)
Exemple #4
0
class Bot:
    '''bot'''
    app = Klein()
    my_dict = DictWithPWL("en_US", "words.txt")
    chkr = SpellChecker(my_dict)

    def __init__(self):
        self.test = 'hi'

    @app.route('/parse', methods=['GET'])
    def parse(self, request):
        '''parser'''
        request.setHeader('Content-Type', 'application/json')
        request_params = {
            key.decode('utf-8', 'strict'): value[0].decode('utf-8', 'strict')
            for key, value in request.args.items()
        }
        text = str(request_params['q']).strip()
        self.chkr.set_text(text)

        for err in self.chkr:
            err.replace(get_best_word(self.chkr, err.word))

        spell_checked = self.chkr.get_text()
        resp = interpreter.parse(unicode(spell_checked, encoding="utf-8"))

        print(resp)

        if (float(resp['intent']['confidence']) > 0.5):
            reply = {"intent": resp['intent'], "entities": resp['entities']}
        else:
            reply = {"intent": {"name": "None"}, "entities": ""}

        return json.dumps(dict(reply), indent=4)
Exemple #5
0
    def addCustomDict(self, customDictPath):
        try:
            self._createCustomDictLang(self._folders[-1])
        except IOError as err:
            logger.error("Can't create custom dictionary")

        key = (CUSTOM_DICT_LANG, customDictPath)

        if key not in self._dictCache:
            broker = Broker()
            broker.set_param('enchant.myspell.dictionary.path',
                             self._folders[-1])

            try:
                currentDict = DictWithPWL(CUSTOM_DICT_LANG,
                                          customDictPath,
                                          broker=broker)
            except enchant.errors.Error as err:
                logger.error('Custom dictionary error. path={}; lang={}'.format(customDictPath, key))
                logger.error(err)
                return

            self._dictCache[key] = currentDict
        else:
            currentDict = self._dictCache[key]

        self._customCheckers.append(currentDict)
Exemple #6
0
    def addCustomDict(self, customDictPath):
        try:
            self._createCustomDictLang(self._folders[-1])
        except IOError:
            pass

        key = (CUSTOM_DICT_LANG, customDictPath)

        if key not in self._dictCache:
            broker = Broker()
            broker.set_param('enchant.myspell.dictionary.path',
                             self._folders[-1])

            try:
                currentDict = DictWithPWL(CUSTOM_DICT_LANG,
                                          customDictPath,
                                          broker=broker)
            except enchant.errors.Error:
                return

            self._dictCache[key] = currentDict
        else:
            currentDict = self._dictCache[key]

        self._customCheckers.append(currentDict)
Exemple #7
0
 def _get_language_checker(self, po_file, reports):
     """Get checker for PO file language."""
     checker = []
     if self.spelling:
         if not ENCHANT_FOUND:
             raise ImportError('Enchant module not found (please install '
                               '"pyenchant")')
         lang = po_file.props['language'] \
             if self.spelling == 'str' else 'en'
         try:
             with tempfile.NamedTemporaryFile() as tmp_file:
                 tmp_file.write(self.pwl.encode('utf-8'))
                 tmp_file.flush()
                 _dict = DictWithPWL(lang, tmp_file.name)
                 checker.append(SpellChecker(_dict))
         except DictNotFoundError:
             reports.append(
                 PoReport(
                     'enchant dictionary not found for language "{0}"'
                     ''.format(lang), 'dict', po_file.filename,
                     po_file.props['language_numline']))
             checker = []
         except IOError as exc:
             reports.append(
                 PoReport(str(exc), 'pwl', po_file.filename,
                          po_file.props['language_numline']))
             checker = []
     return checker
Exemple #8
0
 def spellCheckHelper(self, row):
     count = 0
     tokenizer = RegexpTokenizer(r'\w+')
     # enchantDictionary = enchant.Dict("en_US")
     my_dict = DictWithPWL("en_US", "morewords.txt")
     my_checker = SpellChecker(my_dict)
     # use this tokenizer since it eliminates punctuation
     my_checker.set_text(row['essay'])
     return len(my_checker)
def spelling(text):
    my_dict = DictWithPWL("en_US", "myDict.txt")
    my_checker = SpellChecker(my_dict)
    my_checker.set_text(text)
    e = 0
    print '    Spelling errors: '
    for error in my_checker:
        print "              ", error.word
        e = e + 1
    return e
Exemple #10
0
    def initialise(self, sitecheck):
        super(Spelling, self).initialise(sitecheck)

        # Spell checker must be re-created when check is resumed
        global _enchant_available
        if _enchant_available:
            ddp = os.path.dirname(os.path.abspath(__file__)) + 'dict.txt'
            cdp = self.sitecheck.session.root_path + 'dict.txt'

            if os.path.exists(cdp):
                self.dictionary = cdp
                d = DictWithPWL(self.language, cdp)
            elif os.path.exists(ddp):
                self.dictionary = ddp
                d = DictWithPWL(self.language, ddp)
            else:
                d = Dict(self.language)

            self.spell_checker = SpellChecker(d,
                                              filters=[EmailFilter, URLFilter])
Exemple #11
0
def get_spelling_error_count(essay):
    spelling_error_count = 0
    spelling_errors = []
    my_dict = DictWithPWL("en_US", "morewords.txt")
    chkr = SpellChecker(my_dict)
    chkr.set_text(essay)
    for err in chkr:
        spelling_error_count += 1
        spelling_errors.append(err.word)

    return spelling_error_count, list(set(spelling_errors))
Exemple #12
0
def query_likelihood_35(query, query_id):
    spell_dict = DictWithPWL("en_US")
    spell_checker = SpellChecker(spell_dict)

    rank = 0
    QLM_35_dict = {}
    terms_in_query = query.split()
    doc_list = []
    queryStr = ""
    for term in terms_in_query:
        queryStr += term + " "
        # Enabling Spell checker to find typos in the query
        spell_checker.set_text(term)
        spell_list = []
        for error in spell_checker:
            spell_list = error.suggest(error.word)
        # Means a typo has been detected
        if len(spell_list) != 0:
            for word in spell_list:
                if word in dict_term_unigram_df:
                    term = word
                    break

        if term in dict_term_unigram_df.keys():
            str1 = dict_term_unigram_df[term]
            str2 = str1.split(",")

            for x in str2[:-1]:
                if x.strip() not in doc_list:
                    doc_list.append(x.strip())

    for x in doc_list:
        QLM_35 = calculate_score(query, x, query_id)
        QLM_35_dict.update({x: QLM_35})

    sorted_dict = sorted(QLM_35_dict.items(), key=operator.itemgetter(1))
    ranked_data = sorted_dict[::-1][0:100]

    file = open("Query_Likelihood_Model_0.35_Ranking_with_spell_checking.txt",
                'a')
    file.write(
        str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
            + "\n"))
    file.write("Query : " + str(queryStr) + "\n \n")
    for key, value in ranked_data:
        rank += 1
        temp_str = str(query_id) + " " + "Q0" + " " + " " + str(
            key) + " " + str(rank) + " " + str(value) + " " + "QLM35" + "\n"
        file.write(temp_str + "\n")
    file.write(
        str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
            + "\n"))
    file.close()
Exemple #13
0
 def test_pwl(self):
     """Test checker loop with PWL."""
     from enchant import DictWithPWL
     d = DictWithPWL("en_US", None, None)
     txt = "I am sme text to be cheked with personal list of cheked words"
     chkr = SpellChecker(d, txt)
     for n, err in enumerate(chkr):
         if n == 0:
             self.assertEqual(err.word, "sme")
         if n == 1:
             self.assertEqual(err.word, "cheked")
             chkr.add()
     self.assertEqual(n, 1)
Exemple #14
0
def myspell(fname):
    my_dict = DictWithPWL('en_US', 'mywords.txt')
    print(my_dict)

    spell_checker = SpellChecker(my_dict, filters=[EmailFilter, URLFilter])

    fp = open(fname, 'r')

    lc = 1
    for x in fp:
        spell_checker.set_text(x)
        for error in spell_checker:
            print("Error:", error.word, lc)
        lc = lc + 1
    def __init__(self, mydict=None, lang='it_IT'):
        """[str] [,str]

        Ottiene l'eventuale elenco di parole personalizzate da integrare al
        dizionario ed il linguaggio da applicare - predefinito Italiano
        Solleva una eccezione se `mydict` non è accessibile
        """
        self._lang = lang
        self._custom_dict = mydict
        try:
            self._chkr = SpellChecker(lang, filters=[EmailFilter, URLFilter])
            self._pwl = DictWithPWL(lang, mydict) if mydict else None
        except enchant.errors.DictNotFoundError as nodict_err:
            raise SpellCheckError("Dizionario " + lang + " non trovato")
Exemple #16
0
def test_pwl():
    """Test checker loop with PWL."""
    from enchant import DictWithPWL

    d = DictWithPWL("en_US", None, None)
    txt = "I am sme text to be cheked with personal list of cheked words"
    chkr = SpellChecker(d, txt)
    for n, err in enumerate(chkr):
        if n == 0:
            assert err.word == "sme"
        if n == 1:
            assert err.word == "cheked"
            chkr.add()
    assert n == 1
def calculate_score(query,doc_id,query_id):
    spell_dict = DictWithPWL("en_US")
    spell_checker = SpellChecker(spell_dict)

    terms_in_query = query.split()
    bm25_score=0
    relevance_docIds = relevance_doc_query(query_id)
    # R = len(relevance_docIds) # Total number of relevant documents for query.
    for term_in_query in terms_in_query:
        try:
            # Enabling Spell checker to find typos in the query
            spell_checker.set_text(term_in_query)
            spell_list = []
            for error in spell_checker:
                spell_list = error.suggest(error.word)
            # Means a typo has been detected
            if len(spell_list) != 0:
                for word in spell_list:
                    if word in dict_term_unigram_df:
                        term_in_query = word
                        break

            dl = dict_unigram_terms[doc_id]
            K = k1 * ((1-b) + (b * (dl/avdl)))
            # ri = relevance_doc_term(term_in_query, dict_term_unigram_df, relevance_docIds)
            if term_in_query in dict_term_unigram_df:
                num = dict_term_unigram_df[term_in_query].split(":")
            else:
                num = "0"
            ni = int(num[-1])
            str_fi = get_fi(term_in_query,doc_id)
            if(isinstance(str_fi, str)):
                fi = float(str_fi.strip(")"))
            else:
                fi=0
            qfi = terms_in_query.count(term_in_query)
            exp1 = (((float(ri) + 0.5) 
                / (float(R) - float(ri) + 0.5)) 
                / ((float(ni) - float(ri) + 0.5)
                / (float(N) - float(ni) - float(R) + float(ri) + 0.5)))
            exp2 = math.log(exp1)
            exp3 = (((float(k1) + 1) * float(fi)) / (float(K) + float(fi)))
            exp4 = (((float(k2) + 1) * float(qfi)) / (float(k2) + float(qfi)))
            temp_score = exp2*exp3*exp4
            bm25_score+=temp_score
        except Exception as e:
            print(traceback.format_exc())
            pass
    return bm25_score
Exemple #18
0
def readAmount(imgPath, preprocess):

    #print(os.path.join(root, name))
    image = cv2.imread(imgPath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #Removing some noise
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    image = cv2.erode(image, kernel, iterations=1)
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,
                             cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    #make a check to see if median blurring should be done to remove
    #noise
    elif preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)

    # write the grayscale image to disk as a temporary file so we can
    # apply OCR to it
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
    # load the image, apply OCR, and then delete
    # the temporary file
    Spellchecked = ''
    result = pytesseract.image_to_string(Image.open(filename))
    lines = result.split('\n')
    probableLines = matches(lines)
    #Spell check and auto-correct the extracted line
    if len(probableLines) > 0:
        from enchant.checker import SpellChecker
        chkr = SpellChecker(DictWithPWL("en_US", "num.txt"))
        chkr.set_text(probableLines)
        for err in chkr:
            sug = err.suggest()
            if len(sug) > 0:
                err.replace(sug[0])
        Spellchecked = chkr.get_text()
    words = Spellchecked.split(' ')
    #remove any unreadable characters
    star = '*'
    for word in words:
        if star in word:
            Spellchecked = Spellchecked.replace(word, ' ')
            break
    os.remove(filename)
    return (Spellchecked)
Exemple #19
0
	def __init__(self):
		f = open(str(os.getcwd()) + "/Google_IITB/data/keyweights.db", "rb")
		self.keyweights = pickle.load(f)
		f.close()
		
		f = open(str(os.getcwd()) + "/Google_IITB/data/data.db", "rb")
		self.hash_table = pickle.load(f)
		f.close()
		
		with open(str(os.getcwd()) + "/Google_IITB/data/pageranks.db","rb") as fp:
			self.ranks = pickle.load(fp)
			
		with open(str(os.getcwd()) + "/Google_IITB/data/titles.db","rb") as fp:
			self.titles = pickle.load(fp)
		
		self.d = DictWithPWL("en_US", str(os.getcwd()) + "/Google_IITB/data/allkeys.txt")
Exemple #20
0
    def _init_spell_checker(self):
        """
        Initialize spell checker dictionary
        """

        default_dict = "en_US"
        spell_dict = None

        jargonfile = self.params.get('jargonfile')
        if not jargonfile:
            jargonfile = os.environ.get('JARGONFILE')
        if jargonfile is not None:
            try:
                jargonfile = str(jargonfile)
                spell_dict = DictWithPWL(default_dict, jargonfile)
            except:
                self.error(
                    "Could not initialize dictionary using %s file" % jargonfile)

        if not spell_dict:
            try:
                spell_dict = DictWithPWL(default_dict)
            except:
                self.error(
                    "Could not initialize spell checker with dictionary %s" % default_dict)

            #Check if there is jargonfile on module repo
            url = ("https://src.fedoraproject.org/cgit/modules/%s.git/plain/jargon.txt" %
                   self.mmd.name)
            resp = requests.get(url)
            if resp.status_code >= 200 and resp.status_code < 300:
                for w in resp.content.split("\n"):
                    if w != '':
                        spell_dict.add_to_session(w)

        #add words from module name as jargon
        for w in self.mmd.name.split('-'):
            spell_dict.add_to_session(w)

        try:
            chkr = SpellChecker(spell_dict)
        except:
            self.error("Could not initialize spell checker")

        return chkr
def calculate_BM25(query,query_id,query_enrichment, result_folder_path,file):
    spell_dict = DictWithPWL("en_US")
    spell_checker = SpellChecker(spell_dict)

    rank = 0;
    BM25_dict = {}
    terms_in_query = query.split()
    doc_list = []
    queryStr = ""
    for term in terms_in_query:
        queryStr+=term+" "
        # Enabling Spell checker to find typos in the query
        spell_checker.set_text(term)
        spell_list = []
        for error in spell_checker:
            spell_list = error.suggest(error.word)
        # Means a typo has been detected
        if len(spell_list) != 0:
            for word in spell_list:
                if word in dict_term_unigram_df:
                    term = word
                    break

        if term in dict_term_unigram_df.keys():
            str1 = dict_term_unigram_df[term]
            str2 = str1.split(",")
            for x in str2[:3-1]:
                if x.strip() not in doc_list:
                    doc_list.append(x.strip())

    for x in doc_list:
        BM25_score = calculate_score(query,x,query_id)

        BM25_dict.update({x:BM25_score})
        
    sorted_dict = sorted(BM25_dict.items(), key=operator.itemgetter(1))
    ranked_data = sorted_dict[::-1][0:100]
    
    file.write(str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"+"\n"))
    file.write("Query : "+str(queryStr)+"\n \n")
    for key,value in ranked_data:
        rank+=1;
        temp_str = str(query_id) + " " + "Q0" + " " + " " + str(key) + " " + str(rank) + " " + str(value) + " " + "BM25" + "\n"
        file.write(temp_str +"\n")
    file.write(str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"+"\n"))
Exemple #22
0
    def __init__(self, path, wl_dir, chunkers, filters):
        self.popath = path
        self.po = polib.pofile(path)
        self.lang = self.po.metadata["Language"]

        available_lang = Broker().list_languages()
        if self.lang not in available_lang:
            baselang = self.lang.split("_")[0]
            if baselang in available_lang:
                self.lang = baselang
            else:
                print("Dictionary for language '%s' could not be found." % self.lang)
                raise(errors.DictNotFoundError)

        wordlist = Check.get_wordlist(self.lang, wl_dir, path)
        try:
            check_dict = DictWithPWL(self.lang, pwl=wordlist)
        except errors.Error as e:
            check_dict = Dict(self.lang)
            print(e)
        self.checker = SpellChecker(check_dict, chunkers=chunkers, filters=filters)
Exemple #23
0
# -*- encoding: utf-8 -*-
import freeling
import os
from enchant import DictWithPWL
from enchant.checker import SpellChecker
from difflib import get_close_matches, SequenceMatcher

DATA = "/usr/local/share/freeling/"
LANG = "es"

assert os.path.getsize('../utilities/es-lat') > 0
my_dict = DictWithPWL('es', '../utilities/es-lat')
assert my_dict.provider.name == 'aspell'
chkr = SpellChecker(my_dict)


class Analyzer:
    def __init__(self):

        freeling.util_init_locale("default")

        # Create options set for maco analyzer
        op = freeling.maco_options(LANG)
        op.PunctuationFile = DATA + "common/punct.dat"
        op.DictionaryFile = DATA + LANG + "/es-ar/dicc.src"
        op.AffixFile = DATA + LANG + "/afixos.dat"
        op.LocutionsFile = DATA + LANG + "/locucions.dat"
        op.NPdataFile = DATA + LANG + "/np.dat"
        op.QuantitiesFile = DATA + LANG + "/quantities.dat"
        op.ProbabilityFile = DATA + LANG + "/probabilitats.dat"
class EnchantProxy(object):
    """Wrapper alla libreria enchant"""
    def __init__(self, mydict=None, lang='it_IT'):
        """[str] [,str]

        Ottiene l'eventuale elenco di parole personalizzate da integrare al
        dizionario ed il linguaggio da applicare - predefinito Italiano
        Solleva una eccezione se `mydict` non è accessibile
        """
        self._lang = lang
        self._custom_dict = mydict
        try:
            self._chkr = SpellChecker(lang, filters=[EmailFilter, URLFilter])
            self._pwl = DictWithPWL(lang, mydict) if mydict else None
        except enchant.errors.DictNotFoundError as nodict_err:
            raise SpellCheckError("Dizionario " + lang + " non trovato")

    def check(self, text, chunk_idx):
        """(str, int) -> list of `Error`

        Esegue il controllo per `testo` e ritorna una lista di oggetti
        `Errore` con la parola errata e la lista dei suggerimenti.
        Se la parola non viene trovata viene effettuata una ricerca anche nel
        dizionario personale (`self._pwl`) se definito
        `chunk_idx` è l'identificativo del testo da elaborare
        """
        errors = []
        self._chkr.set_text(text)
        for err in self._chkr:
            if self._pwl and self._pwl.check(err.word):
                continue
            error = Error(err.word, self._chkr.suggest(err.word), chunk_idx)
            error.context = text
            errors.append(error)
        return errors

    def upd_mydict(self, word):
        """(str)

        Aggiunge la parola `word` al dizionario personalizzato (attiva per la
        prossima chiamata a `check`.

        **L'aggiunta viene fatta solo al dizionario personalizzato IN MEMORIA
        Utilizzare `add_custom_word` per l'aggiornamento del dizionario
        personalizzato su disco**
        """
        if not self._pwl:
            return
        if self._pwl.is_added(word):
            raise SpellCheckError("Parola già esistente")
        self._pwl.add(word)

    def add_custom_words(self, words):
        """(list of str)

        Aggiunge le parole in ``words`` al dizionario personalizzato
        """
        if not self._custom_dict:
            raise SpellCheckError("Dizionario personalizzato non presente")
        orig_words = codecs.open(self._custom_dict, encoding='utf-8').split("\n")
        orig_words.extend([w for w in words if w not in orig_words])
        codecs.open(
            self._custom_dict, mode='w', encoding='utf-8'
        ).write("\n".join(orig_words))
Exemple #25
0
        lines = f.read().splitlines()

    # You better not have more than 1 word in a line
    for wrd in lines:
        if not enchant_dict.check(wrd):
            enchant_dict.add_to_pwl(wrd)


if __name__ == '__main__':

    args = parse_args()
    # print(args)

    thisdir = os.path.dirname(os.path.abspath(__file__))

    sitk_dict = DictWithPWL('en_US', thisdir + '/additional_dictionary.txt')

    if args.dict is not None:
        for d in args.dict:
            add_dict(sitk_dict, d)

    spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter])

    output_lvl = 1
    if args.brief:
        output_lvl = 0
    else:
        if args.verbose:
            output_lvl = 2
    if args.miss:
        output_lvl = -1
from enchant import DictWithPWL
from enchant.checker import SpellChecker

my_dict = DictWithPWL("en_US", "myDict.txt")
my_checker = SpellChecker(my_dict)
with open('test_copy.txt', 'r') as f:
    f_contents = f.read().decode("utf-8-sig").encode(
        "utf-8")  #decode the contents to unicode and encode to utf-8
    my_checker.set_text(f_contents)
    e = 0
    for error in my_checker:
        print "ERROR:", error.word
        e = e + 1
    print('No. of errors: ', e)
'''
import enchant
import wx
from enchant.checker import SpellChecker
from enchant.checker.wxSpellCheckerDialog import wxSpellCheckerDialog
from enchant.checker.CmdLineChecker import CmdLineChecker


a = "Cats are animalss. " \
    "They are violenttt."
chkr = enchant.checker.SpellChecker("en_US")
chkr.set_text(a)
for err in chkr:
    print err.word
    sug = err.suggest()[0]
    err.replace(sug)
Exemple #27
0
def main():
    config = configparser.ConfigParser()
    config.read('setup.cfg')
    conf = config['potypo']

    chunker_list = []
    for chunker in conf['chunkers'].strip().split(","):
        if "." in chunker:
            components = chunker.rsplit('.', 1)
            mod = __import__(components[0], fromlist=[components[1]])
            class_object = getattr(mod, components[1])
        else:
            class_object = getattr(chunkers, chunker)

        chunker_list.append(class_object)

    filter_list = []
    for f in conf['filters'].strip().split(","):
        if "." in f:
            components = f.rsplit('.', 1)
            mod = __import__(components[0], fromlist=[components[1]])
            class_object = getattr(mod, components[1])
        else:
            class_object = getattr(filters, f)

        filter_list.append(class_object)

    if 'phrases' in conf:
        phrases = conf['phrases'].strip().split('\n')
        chunker_list.append(chunkers.make_PhraseChunker(phrases))

    if 'edgecase_words' in conf:
        words = conf['edgecase_words'].strip().split('\n')
        filter_list.append(filters.make_EdgecaseFilter(words))

    def errmsg(path, linenum, word):
        print("ERROR: {}:{}: {}".format(path, linenum, word))

    # checks contains one Check-Object for every po-file
    checks = []

    for root, dirs, files in os.walk(conf['locales_dir']):
        for f in files:
            if f.endswith(".po"):
                try:
                    checks.append(
                        Check(os.path.join(root, f), conf['wl_dir'],
                              chunker_list, filter_list))
                except errors.DictNotFoundError as err:
                    print(
                        err,
                        "Potypo will not check for spelling errors in this language."
                    )

    en_wordlist = Check.get_wordlist(conf['default_language'], conf['wl_dir'],
                                     conf['locales_dir'])
    en_dict = DictWithPWL(conf['default_language'], pwl=en_wordlist)
    en_ckr = SpellChecker(en_dict, chunkers=chunker_list, filters=filter_list)

    fail = False  # used for tracking whether failing errors occurred
    for c in checks:
        print("Checking Errors in file", c.popath, "for lang", c.lang)
        for entry in c.po:
            if entry.obsolete:
                continue

            en_ckr.set_text(entry.msgid)
            for err in en_ckr:
                fail = True
                path = os.path.relpath(c.popath,
                                       start=config['potypo']['locales_dir'])
                errmsg(path, entry.linenum, err.word)

            c.checker.set_text(entry.msgstr)
            for err in c.checker:
                if c.lang not in conf['no_fail']:
                    fail = True
                path = os.path.relpath(c.popath,
                                       start=config['potypo']['locales_dir'])
                errmsg(path, entry.linenum, err.word)

    print("Spell-checking done.")

    if fail:
        sys.exit(1)
    sys.exit(0)
from enchant import DictWithPWL
my_dict = DictWithPWL("pt_BR")
for x in my_dict:
    print(x)
Exemple #29
0
def test_DWPWL(tmp_path, pwl_path):
    """Test functionality of DictWithPWL."""
    setPWLContents(pwl_path, ["Sazz", "Lozz"])
    other_path = tmp_path / "pel.txt"
    d = DictWithPWL("en_US", str(pwl_path), str(other_path))
    assert d.check("Sazz")
    assert d.check("Lozz")
    assert d.check("hello")
    assert not d.check("helo")
    assert not d.check("Flagen")
    d.add("Flagen")
    assert d.check("Flagen")
    assert "Flagen" in getPWLContents(pwl_path)
    assert "Flagen" in d.suggest("Flagn")
    assert "hello" in d.suggest("helo")
    d.remove("hello")
    assert not d.check("hello")
    assert "hello" not in d.suggest("helo")
    d.remove("Lozz")
    assert not d.check("Lozz")
Exemple #30
0
def test_DWPWL_empty(tmp_path):
    """Test functionality of DictWithPWL using transient dicts."""
    d = DictWithPWL("en_US", None, None)
    assert d.check("hello")
    assert not d.check("helo")
    assert not d.check("Flagen")
    d.add("Flagen")
    assert d.check("Flagen")
    d.remove("hello")
    assert not d.check("hello")
    d.add("hello")
    assert d.check("hello")
    def static_analysis(self, path):
        """
        Perform static analysis of the notebook.
        Read the notebook and check that there is no ouput and that the links
        in the markdown cells are not broken.
        Args:
            path (string): Name of notebook.
        Return:
            boolean: True if static analysis succeeded, otherwise False.
        """

        nb = nbformat.read(path, nbformat.current_nbformat)

        #######################
        # Check that the notebook does not contain output from code cells
        # (should not be in the repository, but well...).
        #######################
        no_unexpected_output = True

        # Check that the cell dictionary has an 'outputs' key and that it is
        # empty, relies on Python using short circuit evaluation so that we
        # don't get KeyError when retrieving the 'outputs' entry.
        cells_with_output = [
            c.source for c in nb.cells if 'outputs' in c and c.outputs
        ]
        if cells_with_output:
            no_unexpected_output = False
            print(
                'Cells with unexpected output:\n_____________________________')
            for cell in cells_with_output:
                print(cell + '\n---')
        else:
            print('no unexpected output')

        #######################
        # Check that all the links in the markdown cells are valid/accessible.
        #######################
        no_broken_links = True

        cells_and_broken_links = []
        for c in nb.cells:
            if c.cell_type == 'markdown':
                html_tree = document_fromstring(markdown.markdown(c.source))
                broken_links = []
                #iterlinks() returns tuples of the form (element, attribute, link, pos)
                for document_link in html_tree.iterlinks():
                    try:
                        if 'http' not in document_link[2]:  # Local file.
                            url = 'file://' + os.path.abspath(document_link[2])
                        else:  # Remote file.
                            url = document_link[2]
                        urlopen(url)
                    except URLError:
                        broken_links.append(url)
                if broken_links:
                    cells_and_broken_links.append((broken_links, c.source))
        if cells_and_broken_links:
            no_broken_links = False
            print('Cells with broken links:\n________________________')
            for links, cell in cells_and_broken_links:
                print(cell + '\n')
                print('\tBroken links:')
                print('\t' + '\n\t'.join(links) + '\n---')
        else:
            print('no broken links')

        #######################
        # Spell check all markdown cells and comments in code cells using the pyenchant spell checker.
        #######################
        no_spelling_mistakes = True
        simpleitk_notebooks_dictionary = DictWithPWL(
            'en_US',
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         'additional_dictionary.txt'))
        spell_checker = SpellChecker(simpleitk_notebooks_dictionary,
                                     filters=[EmailFilter, URLFilter])
        cells_and_spelling_mistakes = []
        for c in nb.cells:
            spelling_mistakes = []
            if c.cell_type == 'markdown':
                # Get the text as a string from the html without the markup which is replaced by space.
                spell_checker.set_text(' '.join(
                    etree.XPath('//text()')(document_fromstring(
                        markdown.markdown(c.source)))))
            elif c.cell_type == 'code':
                # Get all the comments and concatenate them into a single string separated by newlines.
                comment_lines = re.findall('#+.*', c.source)
                spell_checker.set_text('\n'.join(comment_lines))
            for error in spell_checker:
                error_message = 'error: ' + '\'' + error.word + '\', ' + 'suggestions: ' + str(
                    spell_checker.suggest())
                spelling_mistakes.append(error_message)
            if spelling_mistakes:
                cells_and_spelling_mistakes.append(
                    (spelling_mistakes, c.source))
        if cells_and_spelling_mistakes:
            no_spelling_mistakes = False
            print('Cells with spelling mistakes:\n________________________')
            for misspelled_words, cell in cells_and_spelling_mistakes:
                print(cell + '\n')
                print('\tMisspelled words and suggestions:')
                print('\t' + '\n\t'.join(misspelled_words) + '\n---')
        else:
            print('no spelling mistakes')

        return (no_unexpected_output and no_broken_links
                and no_spelling_mistakes)
Exemple #32
0
class Search:
	ranks = {}
	keyweights = {}
	hash_table = {}
	titles = {}
	d = {}
		
	def __init__(self):
		f = open(str(os.getcwd()) + "/Google_IITB/data/keyweights.db", "rb")
		self.keyweights = pickle.load(f)
		f.close()
		
		f = open(str(os.getcwd()) + "/Google_IITB/data/data.db", "rb")
		self.hash_table = pickle.load(f)
		f.close()
		
		with open(str(os.getcwd()) + "/Google_IITB/data/pageranks.db","rb") as fp:
			self.ranks = pickle.load(fp)
			
		with open(str(os.getcwd()) + "/Google_IITB/data/titles.db","rb") as fp:
			self.titles = pickle.load(fp)
		
		self.d = DictWithPWL("en_US", str(os.getcwd()) + "/Google_IITB/data/allkeys.txt")
	
	def swap(self, listOfUrls, i, j):
		tmp = listOfUrls[i]
		listOfUrls[i] = listOfUrls[i-1]
		listOfUrls[i-1] = tmp
		return


	def hashFunc(self,key):
		hashout = 0
		for i in range(len(key)):
			hashout = hashout + ord(key[i])
		return hashout
	

	def findinKeyTable(self, key, Table):
		hashkey = self.hashFunc(key)
		if hashkey in Table:
			for i in range(len(Table[hashkey])):
				if Table[hashkey][i][1] == key:
					return i
			return False
		return False


	def spellCheck(self, word):
		if self.d.check(word) == True:
			return True
		else:
			suggest = self.d.suggest(word)
			for i in range(len(suggest)):
				suggest[i] = suggest[i].lower()
			#keyweights = getKeyWeights()
			bestweight = 0
			bestword = suggest[0]
			for entry in suggest:
				hashkey = self.hashFunc(entry)
				secKey = self.findinKeyTable(entry, self.keyweights)
				if secKey != False:
					if self.keyweights[hashkey][secKey][0] >= bestweight:
						bestword = self.keyweights[hashkey][secKey][1]
						bestweight = self.keyweights[hashkey][secKey][0]
				return bestword
			return False
		

	def ngrams(self, word):
		Ngrams = []
		for i in range(3,len(word)+1):
			Ngrams.append(word[ : i])
		return Ngrams
	

	def exactQuery(self, entry):
		return entry.split()


	def Query(self, entry):
		words = entry.split() #words has to be returned somehow
		searchlist = []
		for i in range(len(words)):
			searchlist = searchlist + self.ngrams(words[i].lower())
		#print searchlist
		return searchlist
	
	
	def Sort(self, listOfUrls):
		loc_ranks = []
		for url in listOfUrls:
			hashkey = self.hashFunc(url)
			for i in range(len(self.ranks[hashkey])):
				if self.ranks[hashkey][i][0] == url:
					loc_ranks.append(self.ranks[hashkey][i][1])
		for i in range(1,len(listOfUrls)):
			if loc_ranks[i] > loc_ranks[i-1]:
				self.swap(listOfUrls, i, i-1)
		return listOfUrls
	
	def removeRepeats(self, result):
		for i in range(len(result)):
			for j in range(i):
				if result[i] == result[j]:
					result[i] = 0
		resultFinal = filter(lambda a: a != 0, result)
		return resultFinal
	

	def primarySort(self, result, matches):
		for i in range(1,len(result)):
			if matches[i] > matches[i-1]:
				self.swap(result, i, i-1)
		resultSort = self.removeRepeats(result)
		return resultSort


	def findin(self, key, query, table):
		for i in range(len(table[key])):
			if table[key][i][0] == query:
				return i
		return -1

	def numberOfMatches(self, url, result):
		Count = 0
		for link in result:
			if url == link:
				Count = Count + 1
		return Count

	
	def search(self, query):
		result = []
		searchlist = self.Query(query)
		for i in range(len(searchlist)):
			key = self.hashFunc(searchlist[i])
			secKey = self.findin(key, searchlist[i], self.hash_table)
			if secKey == -1:
				return []
			temp = self.hash_table[key][secKey][1 :]
			for j in range(len(temp)):
				#if Find(result, temp[j]) == 0:
				result.append(temp[j])
		matches = []
		#before sorting, first sort according to no. of matches
		for link in result:
			matches.append(self.numberOfMatches(link, result))
		result = self.Sort(result)
		result = self.primarySort(result, matches)
		final = [[] for i in range(len(result))]
		for i in range(len(result)):
			try:
				if self.titles[result[i]] == '': final[i] = [result[i], result[i]]
				else: final[i] = [result[i], self.titles[result[i]]]
			except:
				final[i] = [result[i], result[i]]
		return final
		
		
	def searchWSC(self, query):
		change = False
		searchlist = query.split()
		for i in range(len(searchlist)):
			bestword = self.spellCheck(searchlist[i])
			if bestword != True: 
				searchlist[i] = bestword
				change = True
		changedEntry = ""
		result = self.search(query)
		if change:
			for i,word in enumerate(searchlist):
				if i is not (len(searchlist)-1): changedEntry = changedEntry + word + "+"
				else: changedEntry = changedEntry + word
		return {'change' : change,'query' : changedEntry , 'search' : result}
Exemple #33
0
def add_dict(enchant_dict, filename):
    with open(filename) as f:
        lines = f.read().splitlines()

    # You better not have more than 1 word in a line
    for wrd in lines:
        if not enchant_dict.check(wrd):
            enchant_dict.add_to_pwl(wrd)


if __name__ == '__main__':

    args = parse_args()
    # print(args)

    sitk_dict = DictWithPWL('en_US', 'additional_dictionary.txt')

    if args.dict is not None:
        for d in args.dict:
            add_dict(sitk_dict, d)

    spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter])

    output_lvl = 1
    if args.brief:
        output_lvl = 0
    else:
        if args.verbose:
            output_lvl = 2
    if args.miss:
        output_lvl = -1
Exemple #34
0
#coding UTF-8
import sys
import re
import string
import enchant
import time
import multiprocessing as mp
from functools import partial
from enchant import DictWithPWL
from enchant.checker import *
#import python_ginger_api as ginger
import language_check
import codecs

cleaned_text = ""
d = DictWithPWL("en_US","dict")
#chkr = SpellChecker("en_US")
chkr = SpellChecker(d)
tool = language_check.LanguageTool('en-US')

def eval_sentence(sentence,g="",cs=""):
    text = ""
    tot_sp = 0
    tot_gm = 0
    if g == "-g":
        if len(sentence) < 200:
            matches = tool.check(sentence)
            if len(matches) > 0:
                with open(cleaned_text+ ".gm_log","a+") as log_f:
                    for match in matches:
		    	if str(match.ruleId) != "WHITESPACE_RULE":