Esempi in Python per DictWithPWL.DictWithPWL, esempi in Python per enchant.DictWithPWL.DictWithPWL

Esempio n. 1

0

Mostra file

File: bot_units.py Progetto: bubuchacha/botster_nlu

class Bot:
    '''bot'''
    app = Klein()
    my_dict = DictWithPWL("en_US", "words.txt")
    chkr = SpellChecker(my_dict)

    def __init__(self):
        self.test = 'hi'

    @app.route('/parse', methods=['GET'])
    def parse(self, request):
        '''parser'''
        request.setHeader('Content-Type', 'application/json')
        request_params = {
            key.decode('utf-8', 'strict'): value[0].decode('utf-8', 'strict')
            for key, value in request.args.items()
        }
        text = str(request_params['q']).strip()
        self.chkr.set_text(text)

        for err in self.chkr:
            err.replace(get_best_word(self.chkr, err.word))

        spell_checked = self.chkr.get_text()
        resp = interpreter.parse(unicode(spell_checked, encoding="utf-8"))

        print(resp)

        if (float(resp['intent']['confidence']) > 0.5):
            reply = {"intent": resp['intent'], "entities": resp['entities']}
        else:
            reply = {"intent": {"name": "None"}, "entities": ""}

        return json.dumps(dict(reply), indent=4)

Esempio n. 2

0

Mostra file

File: enchantwrapper.py Progetto: sarutobi/outwiker

    def addCustomDict(self, customDictPath):
        try:
            self._createCustomDictLang(self._folders[-1])
        except IOError:
            pass

        key = (CUSTOM_DICT_LANG, customDictPath)

        if key not in self._dictCache:
            broker = Broker()
            broker.set_param('enchant.myspell.dictionary.path',
                             self._folders[-1])

            try:
                currentDict = DictWithPWL(CUSTOM_DICT_LANG,
                                          customDictPath,
                                          broker=broker)
            except enchant.errors.Error:
                return

            self._dictCache[key] = currentDict
        else:
            currentDict = self._dictCache[key]

        self._customCheckers.append(currentDict)

Esempio n. 3

0

Mostra file

    def addCustomDict(self, customDictPath):
        try:
            self._createCustomDictLang(self._folders[-1])
        except IOError as err:
            logger.error("Can't create custom dictionary")

        key = (CUSTOM_DICT_LANG, customDictPath)

        if key not in self._dictCache:
            broker = Broker()
            broker.set_param('enchant.myspell.dictionary.path',
                             self._folders[-1])

            try:
                currentDict = DictWithPWL(CUSTOM_DICT_LANG,
                                          customDictPath,
                                          broker=broker)
            except enchant.errors.Error as err:
                logger.error('Custom dictionary error. path={}; lang={}'.format(customDictPath, key))
                logger.error(err)
                return

            self._dictCache[key] = currentDict
        else:
            currentDict = self._dictCache[key]

        self._customCheckers.append(currentDict)

Esempio n. 4

0

Mostra file

File: po.py Progetto: rimrul/msgcheck

 def _get_language_checker(self, po_file, reports):
     """Get checker for PO file language."""
     checker = []
     if self.spelling:
         if not ENCHANT_FOUND:
             raise ImportError('Enchant module not found (please install '
                               '"pyenchant")')
         lang = po_file.props['language'] \
             if self.spelling == 'str' else 'en'
         try:
             with tempfile.NamedTemporaryFile() as tmp_file:
                 tmp_file.write(self.pwl.encode('utf-8'))
                 tmp_file.flush()
                 _dict = DictWithPWL(lang, tmp_file.name)
                 checker.append(SpellChecker(_dict))
         except DictNotFoundError:
             reports.append(
                 PoReport(
                     'enchant dictionary not found for language "{0}"'
                     ''.format(lang), 'dict', po_file.filename,
                     po_file.props['language_numline']))
             checker = []
         except IOError as exc:
             reports.append(
                 PoReport(str(exc), 'pwl', po_file.filename,
                          po_file.props['language_numline']))
             checker = []
     return checker

Esempio n. 5

0

Mostra file

def checkAllFiles() :
    lang = os.environ.get('POOTLE_LANG')
    spellCheckLang = os.environ.get('SPELLCHECK_LANG')
    if (os.environ.get('POOTLE_LANG') == None) :
        print "The POOTLE_LANG variable is not set!"
        print "Please set it with export POOTLE_LANG=hu before calling this script!"
        print "The variable should match with the language code on the pootle."
        return
    
    if (spellCheckLang == None) :
        print "The SPELLCHECK_LANG variable is not set!"
        print "Please set it with export SPELLCHECK_LANG=hu_HU before calling this script!"
        print "The variable should match with the language code on the pootle."
        return
    
    pwl = DictWithPWL(spellCheckLang, "known_words_database/" +lang+ ".txt")
    chkr = SpellChecker(pwl)

    with open("tools/packagelist.txt") as f:
        fileList = f.readlines()
    
    fileList = [x.strip() for x in fileList] 
    
    for filename in fileList :
        checkFile("translations/"+lang+"/" + filename, chkr)

Esempio n. 6

0

Mostra file

File: spelling.py Progetto: jon-turney/calm

def spellcheck_hints(args, packages):
    spelldict = DictWithPWL('en-US')
    chkr = SpellChecker(spelldict, filters=[DescFilter])
    misspellings = {}

    # add technical words not in spell-checking dictionary
    wordlist = []
    with open('words.txt') as f:
        for w in f:
            # strip any trailing comment
            w = re.sub(r'#.*$', '', w)
            # strip any whitespace
            w = w.strip()
            spelldict.add(w)
            wordlist.append(w.lower())
            # XXX: for the moment, to reduce the set of errors, ignore the fact
            # that words.txt gives a canonical capitalization, and accept any
            # capitalization
            spelldict.add(w.lower())
            spelldict.add(w.capitalize())

    # add all package names as valid words
    for p in packages:
        for w in re.split('[_-]', p):
            # remove punctuation characters
            w = re.sub(r'[+]', '', w)
            # strip off any trailing numbers
            w = re.sub(r'[\d.]*$', '', w)

            # both with and without any lib prefix
            for wl in [w, re.sub(r'^lib', '', w)]:
                # add the package name unless it exists in the list above, which
                # will give a canonical capitalization
                if wl.lower() not in wordlist:
                    spelldict.add(wl.lower())
                    spelldict.add(wl)
                    spelldict.add(wl.capitalize())

    # for each package
    for p in sorted(packages.keys()):
        # debuginfo packages have uninteresting, auto-generated text which
        # contains the package name
        if p.endswith('-debuginfo'):
            continue

        # spell-check the spell-checkable keys
        for k in ['sdesc', 'ldesc', 'message']:
            if k in packages[p].hints:
                chkr.set_text(packages[p].hints[k])
                # XXX: this is doing all the work to generate suggestions, which
                # we then ignore, so could be written much more efficiently
                for err in chkr:
                    # print("package '%s', hint '%s': Is '%s' a word?" % (p, k, err.word))
                    misspellings.setdefault(err.word, 0)
                    misspellings[err.word] += 1

    # summarize
    for c in sorted(misspellings, key=misspellings.get, reverse=True):
        print('%16s: %4d' % (c, misspellings[c]))

Esempio n. 7

0

Mostra file

    def _init_spell_checker(self):
        """
        Initialize spell checker dictionary
        """

        default_dict = "en_US"
        spell_dict = None

        jargonfile = self.params.get('jargonfile')
        if not jargonfile:
            jargonfile = os.environ.get('JARGONFILE')
        if jargonfile is not None:
            try:
                jargonfile = str(jargonfile)
                spell_dict = DictWithPWL(default_dict, jargonfile)
            except:
                self.error(
                    "Could not initialize dictionary using %s file" % jargonfile)

        if not spell_dict:
            try:
                spell_dict = DictWithPWL(default_dict)
            except:
                self.error(
                    "Could not initialize spell checker with dictionary %s" % default_dict)

            #Check if there is jargonfile on module repo
            url = ("https://src.fedoraproject.org/cgit/modules/%s.git/plain/jargon.txt" %
                   self.mmd.name)
            resp = requests.get(url)
            if resp.status_code >= 200 and resp.status_code < 300:
                for w in resp.content.split("\n"):
                    if w != '':
                        spell_dict.add_to_session(w)

        #add words from module name as jargon
        for w in self.mmd.name.split('-'):
            spell_dict.add_to_session(w)

        try:
            chkr = SpellChecker(spell_dict)
        except:
            self.error("Could not initialize spell checker")

        return chkr

Esempio n. 8

0

Mostra file

 def spellCheckHelper(self, row):
     count = 0
     tokenizer = RegexpTokenizer(r'\w+')
     # enchantDictionary = enchant.Dict("en_US")
     my_dict = DictWithPWL("en_US", "morewords.txt")
     my_checker = SpellChecker(my_dict)
     # use this tokenizer since it eliminates punctuation
     my_checker.set_text(row['essay'])
     return len(my_checker)

Esempio n. 9

0

Mostra file

File: cross_validation.py Progetto: adhikaridev/Automated-Essay-Marking

def spelling(text):
    my_dict = DictWithPWL("en_US", "myDict.txt")
    my_checker = SpellChecker(my_dict)
    my_checker.set_text(text)
    e = 0
    print '    Spelling errors: '
    for error in my_checker:
        print "              ", error.word
        e = e + 1
    return e

Esempio n. 10

0

Mostra file

File: Auto Correction of Spelling Errors.py Progetto: nadir453/Auto-Correction-of-Spelling-Errors-

def find_sug_words(evt):
    suget_wrd = ""
    levn_cost = 0
    dict, max = {}, 0
    tmp = 0
    wrd_lst_pair = ""
    len_mispld_wrd = 0
    len_corpus_wrd = 0
    target = ""
    source = ""
    columns = 0
    rows = 0
    m = 0

    #Word_selec.delete(1.0,END)
    value = wrd_lstbox.get(ANCHOR)

    #value = wrd_lstbox.get(wrd_lstbox .curselection())
    print("value", value)

    #Word_selec.insert(INSERT, value)
    #Word_selec.tag_add("start", "1.0", END)
    #Word_selec.tag_config("start",font=("Georgia", "12", "bold"), background="yellow")

    inpt_str = ''.join(Txt_input.get("1.0", END))

    word = Txt_input.get("1.0", END)
    inpt_txt = re.sub("[^\w]", " ", word).split()
    #print("Input_str",inpt_str)

    #new_str = inpt_str.replace("iterate",value)
    #Txt_input.delete(1.0,END)
    #Txt_input.insert(INSERT,inpt_str.replace("iterate",value))
    #print("New string",new_str)

    # Words suggestion for Missing Words

    d = DictWithPWL("en_US", "Word Dicitionary4.txt")
    print(d.check(value))

    chk_status = d.check(value)

    suggst_str = d.suggest(value)

    sugst_lstbx.delete(0, END)
    '''
    for sug in suggst_str:
        
        sugst_lstbx.insert(0,sug)

    #print("Sugest string",suggst_str)
        
    '''
    print("Listbox pressed")

Esempio n. 11

0

Mostra file

File: views.py Progetto: TarangKhanna/AutoEssayGrader

def get_spelling_error_count(essay):
    spelling_error_count = 0
    spelling_errors = []
    my_dict = DictWithPWL("en_US", "morewords.txt")
    chkr = SpellChecker(my_dict)
    chkr.set_text(essay)
    for err in chkr:
        spelling_error_count += 1
        spelling_errors.append(err.word)

    return spelling_error_count, list(set(spelling_errors))

Esempio n. 12

0

Mostra file

File: modules.py Progetto: cash2one/sitecheck

    def initialise(self, sitecheck):
        super(Spelling, self).initialise(sitecheck)

        # Spell checker must be re-created when check is resumed
        global _enchant_available
        if _enchant_available:
            ddp = os.path.dirname(os.path.abspath(__file__)) + 'dict.txt'
            cdp = self.sitecheck.session.root_path + 'dict.txt'

            if os.path.exists(cdp):
                self.dictionary = cdp
                d = DictWithPWL(self.language, cdp)
            elif os.path.exists(ddp):
                self.dictionary = ddp
                d = DictWithPWL(self.language, ddp)
            else:
                d = Dict(self.language)

            self.spell_checker = SpellChecker(d,
                                              filters=[EmailFilter, URLFilter])

Esempio n. 13

0

Mostra file

def test_DWPWL_empty(tmp_path):
    """Test functionality of DictWithPWL using transient dicts."""
    d = DictWithPWL("en_US", None, None)
    assert d.check("hello")
    assert not d.check("helo")
    assert not d.check("Flagen")
    d.add("Flagen")
    assert d.check("Flagen")
    d.remove("hello")
    assert not d.check("hello")
    d.add("hello")
    assert d.check("hello")

Esempio n. 14

0

Mostra file

def query_likelihood_35(query, query_id):
    spell_dict = DictWithPWL("en_US")
    spell_checker = SpellChecker(spell_dict)

    rank = 0
    QLM_35_dict = {}
    terms_in_query = query.split()
    doc_list = []
    queryStr = ""
    for term in terms_in_query:
        queryStr += term + " "
        # Enabling Spell checker to find typos in the query
        spell_checker.set_text(term)
        spell_list = []
        for error in spell_checker:
            spell_list = error.suggest(error.word)
        # Means a typo has been detected
        if len(spell_list) != 0:
            for word in spell_list:
                if word in dict_term_unigram_df:
                    term = word
                    break

        if term in dict_term_unigram_df.keys():
            str1 = dict_term_unigram_df[term]
            str2 = str1.split(",")

            for x in str2[:-1]:
                if x.strip() not in doc_list:
                    doc_list.append(x.strip())

    for x in doc_list:
        QLM_35 = calculate_score(query, x, query_id)
        QLM_35_dict.update({x: QLM_35})

    sorted_dict = sorted(QLM_35_dict.items(), key=operator.itemgetter(1))
    ranked_data = sorted_dict[::-1][0:100]

    file = open("Query_Likelihood_Model_0.35_Ranking_with_spell_checking.txt",
                'a')
    file.write(
        str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
            + "\n"))
    file.write("Query : " + str(queryStr) + "\n \n")
    for key, value in ranked_data:
        rank += 1
        temp_str = str(query_id) + " " + "Q0" + " " + " " + str(
            key) + " " + str(rank) + " " + str(value) + " " + "QLM35" + "\n"
        file.write(temp_str + "\n")
    file.write(
        str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
            + "\n"))
    file.close()

Esempio n. 15

0

Mostra file

    def setUp(self):
        """
        Verify required modulemd file parameter has been specified, exists,
        and can be loaded. The file name and loaded metadata are saved.
        """
        mdfile = self.params.get('modulemd')
        if mdfile is None:
            self.error("modulemd parameter must be supplied")

        mdfile = str(mdfile)
        if not os.path.isfile(mdfile):
            self.error("modulemd file %s must exist" % mdfile)

        try:
            mmd = modulemd.ModuleMetadata()
            mmd.load(mdfile)
        except Exception as ex:
            self.error(
                "There was an error while processing modulemd file %s: %s" %
                (mdfile, ex))

        # Infer the module name from the mdfile name and check that it is sane
        mdfileModuleName, mdfileExtension = os.path.basename(mdfile).split(
            '.', 1)
        if (mdfileExtension != 'yaml') and (mdfileExtension != 'yml'):
            self.error("modulemd file %s must have a .y[a]ml extension" %
                       mdfile)
        if mmd.name == '':
            # The name can be missing from the metadata because the builder
            # knows how to infer it
            mmd.name = mdfileModuleName
        elif mmd.name != mdfileModuleName:
            self.error(
                "modulemd file name %s and module name %s do not match" %
                (mdfileModuleName, mmd.name))

        self.mdfile = mdfile
        self.mmd = mmd

        try:
            jargonfile = self.params.get('jargonfile')
            if jargonfile is not None:
                jargonfile = str(jargonfile)
                dict = DictWithPWL("en_US", jargonfile)
                for w in self.mmd.name.split('-'):
                    dict.add_to_session(w)
                self.chkr = SpellChecker(dict)
            else:
                self.chkr = SpellChecker("en_US")
        except:
            self.error(
                "Could not initialize spell checker with dictionary %s" % dict)

Esempio n. 16

0

Mostra file

 def test_pwl(self):
     """Test checker loop with PWL."""
     from enchant import DictWithPWL
     d = DictWithPWL("en_US", None, None)
     txt = "I am sme text to be cheked with personal list of cheked words"
     chkr = SpellChecker(d, txt)
     for n, err in enumerate(chkr):
         if n == 0:
             self.assertEqual(err.word, "sme")
         if n == 1:
             self.assertEqual(err.word, "cheked")
             chkr.add()
     self.assertEqual(n, 1)

Esempio n. 17

0

Mostra file

def test_pwl():
    """Test checker loop with PWL."""
    from enchant import DictWithPWL

    d = DictWithPWL("en_US", None, None)
    txt = "I am sme text to be cheked with personal list of cheked words"
    chkr = SpellChecker(d, txt)
    for n, err in enumerate(chkr):
        if n == 0:
            assert err.word == "sme"
        if n == 1:
            assert err.word == "cheked"
            chkr.add()
    assert n == 1

Esempio n. 18

0

Mostra file

def myspell(fname):
    my_dict = DictWithPWL('en_US', 'mywords.txt')
    print(my_dict)

    spell_checker = SpellChecker(my_dict, filters=[EmailFilter, URLFilter])

    fp = open(fname, 'r')

    lc = 1
    for x in fp:
        spell_checker.set_text(x)
        for error in spell_checker:
            print("Error:", error.word, lc)
        lc = lc + 1

Esempio n. 19

0

Mostra file

File: BM25_Model_with_spell_check.py Progetto: surupachatterjee/Information_Retrieval_Project_Spring_2018

def calculate_score(query,doc_id,query_id):
    spell_dict = DictWithPWL("en_US")
    spell_checker = SpellChecker(spell_dict)

    terms_in_query = query.split()
    bm25_score=0
    relevance_docIds = relevance_doc_query(query_id)
    # R = len(relevance_docIds) # Total number of relevant documents for query.
    for term_in_query in terms_in_query:
        try:
            # Enabling Spell checker to find typos in the query
            spell_checker.set_text(term_in_query)
            spell_list = []
            for error in spell_checker:
                spell_list = error.suggest(error.word)
            # Means a typo has been detected
            if len(spell_list) != 0:
                for word in spell_list:
                    if word in dict_term_unigram_df:
                        term_in_query = word
                        break

            dl = dict_unigram_terms[doc_id]
            K = k1 * ((1-b) + (b * (dl/avdl)))
            # ri = relevance_doc_term(term_in_query, dict_term_unigram_df, relevance_docIds)
            if term_in_query in dict_term_unigram_df:
                num = dict_term_unigram_df[term_in_query].split(":")
            else:
                num = "0"
            ni = int(num[-1])
            str_fi = get_fi(term_in_query,doc_id)
            if(isinstance(str_fi, str)):
                fi = float(str_fi.strip(")"))
            else:
                fi=0
            qfi = terms_in_query.count(term_in_query)
            exp1 = (((float(ri) + 0.5) 
                / (float(R) - float(ri) + 0.5)) 
                / ((float(ni) - float(ri) + 0.5)
                / (float(N) - float(ni) - float(R) + float(ri) + 0.5)))
            exp2 = math.log(exp1)
            exp3 = (((float(k1) + 1) * float(fi)) / (float(K) + float(fi)))
            exp4 = (((float(k2) + 1) * float(qfi)) / (float(k2) + float(qfi)))
            temp_score = exp2*exp3*exp4
            bm25_score+=temp_score
        except Exception as e:
            print(traceback.format_exc())
            pass
    return bm25_score

Esempio n. 20

0

Mostra file

File: ocr.py Progetto: Diptojyoti/Project

def readAmount(imgPath, preprocess):

    #print(os.path.join(root, name))
    image = cv2.imread(imgPath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #Removing some noise
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    image = cv2.erode(image, kernel, iterations=1)
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,
                             cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    #make a check to see if median blurring should be done to remove
    #noise
    elif preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)

    # write the grayscale image to disk as a temporary file so we can
    # apply OCR to it
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
    # load the image, apply OCR, and then delete
    # the temporary file
    Spellchecked = ''
    result = pytesseract.image_to_string(Image.open(filename))
    lines = result.split('\n')
    probableLines = matches(lines)
    #Spell check and auto-correct the extracted line
    if len(probableLines) > 0:
        from enchant.checker import SpellChecker
        chkr = SpellChecker(DictWithPWL("en_US", "num.txt"))
        chkr.set_text(probableLines)
        for err in chkr:
            sug = err.suggest()
            if len(sug) > 0:
                err.replace(sug[0])
        Spellchecked = chkr.get_text()
    words = Spellchecked.split(' ')
    #remove any unreadable characters
    star = '*'
    for word in words:
        if star in word:
            Spellchecked = Spellchecked.replace(word, ' ')
            break
    os.remove(filename)
    return (Spellchecked)

Esempio n. 21

0

Mostra file

File: BM25_Model_with_spell_check.py Progetto: surupachatterjee/Information_Retrieval_Project_Spring_2018

def calculate_BM25(query,query_id,query_enrichment, result_folder_path,file):
    spell_dict = DictWithPWL("en_US")
    spell_checker = SpellChecker(spell_dict)

    rank = 0;
    BM25_dict = {}
    terms_in_query = query.split()
    doc_list = []
    queryStr = ""
    for term in terms_in_query:
        queryStr+=term+" "
        # Enabling Spell checker to find typos in the query
        spell_checker.set_text(term)
        spell_list = []
        for error in spell_checker:
            spell_list = error.suggest(error.word)
        # Means a typo has been detected
        if len(spell_list) != 0:
            for word in spell_list:
                if word in dict_term_unigram_df:
                    term = word
                    break

        if term in dict_term_unigram_df.keys():
            str1 = dict_term_unigram_df[term]
            str2 = str1.split(",")
            for x in str2[:3-1]:
                if x.strip() not in doc_list:
                    doc_list.append(x.strip())

    for x in doc_list:
        BM25_score = calculate_score(query,x,query_id)

        BM25_dict.update({x:BM25_score})
        
    sorted_dict = sorted(BM25_dict.items(), key=operator.itemgetter(1))
    ranked_data = sorted_dict[::-1][0:100]
    
    file.write(str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"+"\n"))
    file.write("Query : "+str(queryStr)+"\n \n")
    for key,value in ranked_data:
        rank+=1;
        temp_str = str(query_id) + " " + "Q0" + " " + " " + str(key) + " " + str(rank) + " " + str(value) + " " + "BM25" + "\n"
        file.write(temp_str +"\n")
    file.write(str("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"+"\n"))

Esempio n. 22

0

Mostra file

def test_DWPWL(tmp_path, pwl_path):
    """Test functionality of DictWithPWL."""
    setPWLContents(pwl_path, ["Sazz", "Lozz"])
    other_path = tmp_path / "pel.txt"
    d = DictWithPWL("en_US", str(pwl_path), str(other_path))
    assert d.check("Sazz")
    assert d.check("Lozz")
    assert d.check("hello")
    assert not d.check("helo")
    assert not d.check("Flagen")
    d.add("Flagen")
    assert d.check("Flagen")
    assert "Flagen" in getPWLContents(pwl_path)
    assert "Flagen" in d.suggest("Flagn")
    assert "hello" in d.suggest("helo")
    d.remove("hello")
    assert not d.check("hello")
    assert "hello" not in d.suggest("helo")
    d.remove("Lozz")
    assert not d.check("Lozz")

Esempio n. 23

0

Mostra file

    def __init__(self, path, wl_dir, chunkers, filters):
        self.popath = path
        self.po = polib.pofile(path)
        self.lang = self.po.metadata["Language"]

        available_lang = Broker().list_languages()
        if self.lang not in available_lang:
            baselang = self.lang.split("_")[0]
            if baselang in available_lang:
                self.lang = baselang
            else:
                print("Dictionary for language '%s' could not be found." % self.lang)
                raise(errors.DictNotFoundError)

        wordlist = Check.get_wordlist(self.lang, wl_dir, path)
        try:
            check_dict = DictWithPWL(self.lang, pwl=wordlist)
        except errors.Error as e:
            check_dict = Dict(self.lang)
            print(e)
        self.checker = SpellChecker(check_dict, chunkers=chunkers, filters=filters)

Esempio n. 24

0

Mostra file

File: test_notebooks.py Progetto: yian2271368/SimpleITK-Notebooks

    def static_analysis(self, path):
        """
        Perform static analysis of the notebook.
        Read the notebook and check that there is no ouput and that the links
        in the markdown cells are not broken.
        Args:
            path (string): Name of notebook.
        Return:
            boolean: True if static analysis succeeded, otherwise False.
        """

        nb = nbformat.read(path, nbformat.current_nbformat)

        #######################
        # Check that the notebook does not contain output from code cells
        # (should not be in the repository, but well...).
        #######################
        no_unexpected_output = True

        # Check that the cell dictionary has an 'outputs' key and that it is
        # empty, relies on Python using short circuit evaluation so that we
        # don't get KeyError when retrieving the 'outputs' entry.
        cells_with_output = [
            c.source for c in nb.cells if 'outputs' in c and c.outputs
        ]
        if cells_with_output:
            no_unexpected_output = False
            print(
                'Cells with unexpected output:\n_____________________________')
            for cell in cells_with_output:
                print(cell + '\n---')
        else:
            print('no unexpected output')

        #######################
        # Check that all the links in the markdown cells are valid/accessible.
        #######################
        no_broken_links = True

        cells_and_broken_links = []
        for c in nb.cells:
            if c.cell_type == 'markdown':
                html_tree = document_fromstring(markdown.markdown(c.source))
                broken_links = []
                #iterlinks() returns tuples of the form (element, attribute, link, pos)
                for document_link in html_tree.iterlinks():
                    try:
                        if 'http' not in document_link[2]:  # Local file.
                            url = 'file://' + os.path.abspath(document_link[2])
                        else:  # Remote file.
                            url = document_link[2]
                        urlopen(url)
                    except URLError:
                        broken_links.append(url)
                if broken_links:
                    cells_and_broken_links.append((broken_links, c.source))
        if cells_and_broken_links:
            no_broken_links = False
            print('Cells with broken links:\n________________________')
            for links, cell in cells_and_broken_links:
                print(cell + '\n')
                print('\tBroken links:')
                print('\t' + '\n\t'.join(links) + '\n---')
        else:
            print('no broken links')

        #######################
        # Spell check all markdown cells and comments in code cells using the pyenchant spell checker.
        #######################
        no_spelling_mistakes = True
        simpleitk_notebooks_dictionary = DictWithPWL(
            'en_US',
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         'additional_dictionary.txt'))
        spell_checker = SpellChecker(simpleitk_notebooks_dictionary,
                                     filters=[EmailFilter, URLFilter])
        cells_and_spelling_mistakes = []
        for c in nb.cells:
            spelling_mistakes = []
            if c.cell_type == 'markdown':
                # Get the text as a string from the html without the markup which is replaced by space.
                spell_checker.set_text(' '.join(
                    etree.XPath('//text()')(document_fromstring(
                        markdown.markdown(c.source)))))
            elif c.cell_type == 'code':
                # Get all the comments and concatenate them into a single string separated by newlines.
                comment_lines = re.findall('#+.*', c.source)
                spell_checker.set_text('\n'.join(comment_lines))
            for error in spell_checker:
                error_message = 'error: ' + '\'' + error.word + '\', ' + 'suggestions: ' + str(
                    spell_checker.suggest())
                spelling_mistakes.append(error_message)
            if spelling_mistakes:
                cells_and_spelling_mistakes.append(
                    (spelling_mistakes, c.source))
        if cells_and_spelling_mistakes:
            no_spelling_mistakes = False
            print('Cells with spelling mistakes:\n________________________')
            for misspelled_words, cell in cells_and_spelling_mistakes:
                print(cell + '\n')
                print('\tMisspelled words and suggestions:')
                print('\t' + '\n\t'.join(misspelled_words) + '\n---')
        else:
            print('no spelling mistakes')

        return (no_unexpected_output and no_broken_links
                and no_spelling_mistakes)

Esempio n. 25

0

Mostra file

File: spell_check.py Progetto: adhikaridev/Automated-Essay-Marking

from enchant import DictWithPWL
from enchant.checker import SpellChecker

my_dict = DictWithPWL("en_US", "myDict.txt")
my_checker = SpellChecker(my_dict)
with open('test_copy.txt', 'r') as f:
    f_contents = f.read().decode("utf-8-sig").encode(
        "utf-8")  #decode the contents to unicode and encode to utf-8
    my_checker.set_text(f_contents)
    e = 0
    for error in my_checker:
        print "ERROR:", error.word
        e = e + 1
    print('No. of errors: ', e)
'''
import enchant
import wx
from enchant.checker import SpellChecker
from enchant.checker.wxSpellCheckerDialog import wxSpellCheckerDialog
from enchant.checker.CmdLineChecker import CmdLineChecker


a = "Cats are animalss. " \
    "They are violenttt."
chkr = enchant.checker.SpellChecker("en_US")
chkr.set_text(a)
for err in chkr:
    print err.word
    sug = err.suggest()[0]
    err.replace(sug)

Esempio n. 26

0

Mostra file

File: __main__.py Progetto: joker234/potypo

def main():
    config = configparser.ConfigParser()
    config.read('setup.cfg')
    conf = config['potypo']

    chunker_list = []
    for chunker in conf['chunkers'].strip().split(","):
        if "." in chunker:
            components = chunker.rsplit('.', 1)
            mod = __import__(components[0], fromlist=[components[1]])
            class_object = getattr(mod, components[1])
        else:
            class_object = getattr(chunkers, chunker)

        chunker_list.append(class_object)

    filter_list = []
    for f in conf['filters'].strip().split(","):
        if "." in f:
            components = f.rsplit('.', 1)
            mod = __import__(components[0], fromlist=[components[1]])
            class_object = getattr(mod, components[1])
        else:
            class_object = getattr(filters, f)

        filter_list.append(class_object)

    if 'phrases' in conf:
        phrases = conf['phrases'].strip().split('\n')
        chunker_list.append(chunkers.make_PhraseChunker(phrases))

    if 'edgecase_words' in conf:
        words = conf['edgecase_words'].strip().split('\n')
        filter_list.append(filters.make_EdgecaseFilter(words))

    def errmsg(path, linenum, word):
        print("ERROR: {}:{}: {}".format(path, linenum, word))

    # checks contains one Check-Object for every po-file
    checks = []

    for root, dirs, files in os.walk(conf['locales_dir']):
        for f in files:
            if f.endswith(".po"):
                try:
                    checks.append(
                        Check(os.path.join(root, f), conf['wl_dir'],
                              chunker_list, filter_list))
                except errors.DictNotFoundError as err:
                    print(
                        err,
                        "Potypo will not check for spelling errors in this language."
                    )

    en_wordlist = Check.get_wordlist(conf['default_language'], conf['wl_dir'],
                                     conf['locales_dir'])
    en_dict = DictWithPWL(conf['default_language'], pwl=en_wordlist)
    en_ckr = SpellChecker(en_dict, chunkers=chunker_list, filters=filter_list)

    fail = False  # used for tracking whether failing errors occurred
    for c in checks:
        print("Checking Errors in file", c.popath, "for lang", c.lang)
        for entry in c.po:
            if entry.obsolete:
                continue

            en_ckr.set_text(entry.msgid)
            for err in en_ckr:
                fail = True
                path = os.path.relpath(c.popath,
                                       start=config['potypo']['locales_dir'])
                errmsg(path, entry.linenum, err.word)

            c.checker.set_text(entry.msgstr)
            for err in c.checker:
                if c.lang not in conf['no_fail']:
                    fail = True
                path = os.path.relpath(c.popath,
                                       start=config['potypo']['locales_dir'])
                errmsg(path, entry.linenum, err.word)

    print("Spell-checking done.")

    if fail:
        sys.exit(1)
    sys.exit(0)

Esempio n. 27

0

Mostra file

        lines = f.read().splitlines()

    # You better not have more than 1 word in a line
    for wrd in lines:
        if not enchant_dict.check(wrd):
            enchant_dict.add_to_pwl(wrd)


if __name__ == '__main__':

    args = parse_args()
    # print(args)

    thisdir = os.path.dirname(os.path.abspath(__file__))

    sitk_dict = DictWithPWL('en_US', thisdir + '/additional_dictionary.txt')

    if args.dict is not None:
        for d in args.dict:
            add_dict(sitk_dict, d)

    spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter])

    output_lvl = 1
    if args.brief:
        output_lvl = 0
    else:
        if args.verbose:
            output_lvl = 2
    if args.miss:
        output_lvl = -1

Esempio n. 28

0

Mostra file

File: dict.py Progetto: guilhermedelyra/competitive-programming

from enchant import DictWithPWL
my_dict = DictWithPWL("pt_BR")
for x in my_dict:
    print(x)

Esempio n. 29

0

Mostra file

File: analyzer.py Progetto: damiansalvia/cldas

# -*- encoding: utf-8 -*-
import freeling
import os
from enchant import DictWithPWL
from enchant.checker import SpellChecker
from difflib import get_close_matches, SequenceMatcher

DATA = "/usr/local/share/freeling/"
LANG = "es"

assert os.path.getsize('../utilities/es-lat') > 0
my_dict = DictWithPWL('es', '../utilities/es-lat')
assert my_dict.provider.name == 'aspell'
chkr = SpellChecker(my_dict)


class Analyzer:
    def __init__(self):

        freeling.util_init_locale("default")

        # Create options set for maco analyzer
        op = freeling.maco_options(LANG)
        op.PunctuationFile = DATA + "common/punct.dat"
        op.DictionaryFile = DATA + LANG + "/es-ar/dicc.src"
        op.AffixFile = DATA + LANG + "/afixos.dat"
        op.LocutionsFile = DATA + LANG + "/locucions.dat"
        op.NPdataFile = DATA + LANG + "/np.dat"
        op.QuantitiesFile = DATA + LANG + "/quantities.dat"
        op.ProbabilityFile = DATA + LANG + "/probabilitats.dat"

Esempio n. 30

0

Mostra file

def add_dict(enchant_dict, filename):
    with open(filename) as f:
        lines = f.read().splitlines()

    # You better not have more than 1 word in a line
    for wrd in lines:
        if not enchant_dict.check(wrd):
            enchant_dict.add_to_pwl(wrd)


if __name__ == '__main__':

    args = parse_args()
    # print(args)

    sitk_dict = DictWithPWL('en_US', 'additional_dictionary.txt')

    if args.dict is not None:
        for d in args.dict:
            add_dict(sitk_dict, d)

    spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter])

    output_lvl = 1
    if args.brief:
        output_lvl = 0
    else:
        if args.verbose:
            output_lvl = 2
    if args.miss:
        output_lvl = -1