예제 #1
0
    def test_clear_caches_persistance(self):
        temp_dir = tempfile.mkdtemp()
        try:
            h1 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')
            test_suggest = h1.suggest('testing')
            test_stem = h1.stem('testing')

            h1._suggest_cache['made-up'] = test_suggest
            self.assertEqual(h1.suggest('made-up'), test_suggest)
            h1._stem_cache['made-up'] = test_stem
            self.assertEqual(h1.stem('made-up'), test_stem)

            h1.save_cache()
            h1.clear_cache()
            del h1

            cacheman = get_cache_manager('disk_hun')
            cacheman.deregister_all_caches()
            self.assertEqual(len(cacheman.cache_by_name), 0)

            h2 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')

            self.assertEqual(len(h2._suggest_cache), 0)
            self.assertEqual(len(h2._stem_cache), 0)
            self.assertNotEqual(h2.suggest('made-up'), test_suggest)
            self.assertNotEqual(h2.stem('made-up'), test_stem)
        finally:
            shutil.rmtree(temp_dir) # Nuke temp content
예제 #2
0
def spell_corrector(df, lang1, lang2):
    #Create an object of the Hunspell class
    h = Hunspell()
    print('I am spell_checker')
    #An empty list to hold the corrected sentences which would later be made into a dataframe
    corr_sent_list = {'L1': [], 'L2': []}
    #For each sentence in the dataframe
    for sent in df['L1']:

        #Empty string to which the corrected words are appended
        corr_sent = ''
        #For every word in the sentence. Which is split by word boundary
        for w in re.split(r'\b', sent):
            #If the split part is not a word (punctuation marks, spaces) or if it is a correct word, append it to corr_sent
            if not w.isalpha() or h.spell(w):
                corr_sent += w
            #If the split part is word and is incorrect
            else:
                #Suggest possible correct candidates to the incorrect word
                suggest = h.suggest(w)
                #If more than one word is suggested, more processing is required to select a word
                if len(suggest) > 1:
                    #TODO : Parse the list and find the n-gram probability to find the best candidate. For now it just appends the first word
                    corr_sent += suggest[0]
                #If only one word is suggested, append it to corr_sent
                else:
                    corr_sent += suggest[0]
        #When all the words in the sentence is traversed, append the corrected_sentence to corr_sent_list
        corr_sent_list['L1'].append(corr_sent)
    #Convert the corrected sentences list into pandas dataframe to return
    if lang2 is not None:
        corr_sent_list['L2'].extend(list(df['L2']))
        return pd.DataFrame.from_dict(corr_sent_list)
    else:
        return pd.DataFrame(corr_sent_list['L1'], columns=['L1'])
class HunspellChecker(object):
    def __init__(self):
        self.checker = Hunspell()
        self.stopwords = set(SW.words("english")) | set(string.punctuation)

    def correct_word(self, word):
        """Borrowed from:
        https://datascience.blog.wzb.eu/2016/07/13/autocorrecting-misspelled-words-in-python-using-hunspell/
        """
        ok = self.checker.spell(word)  # check spelling
        if not ok:
            suggestions = self.checker.suggest(word)
            if len(suggestions) > 0:  # there are suggestions
                return suggestions[0]
            else:
                return word
        else:
            return word

    def correct_string(self, text, ensure_length=False):
        """Break into words and correct each word."""
        tokens = text.split()
        corrected = []
        for token in tokens:
            if token in self.stopwords: corrected.append(token)
            else:
                correction = self.correct_word(token)
                if ensure_length:
                    corrected.append(correction.split()[0])
                else:
                    corrected.append(correction)
        return " ".join(corrected)
예제 #4
0
    def test_non_overlapping_caches(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        h2 = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
        self.assertNotEqual(h2.suggest('made-up'), test_suggest)
        self.assertNotEqual(h2.stem('made-up'), test_stem)
예제 #5
0
class HunspellChecker(object):
    def __init__(self):
        self.checker = Hunspell()

    def correct(self, word):
        if self.checker.spell(word) == True:
            return word
        else:
            res = self.checker.suggest(word)
            if res:
                return res[0]
            else:
                return word
예제 #6
0
def test_clear_caches_persistance(hunspell):
    temp_dir = tempfile.mkdtemp()
    try:
        h1 = Hunspell('test',
                      hunspell_data_dir=DICT_DIR,
                      disk_cache_dir=temp_dir,
                      cache_manager='disk_hun')
        test_suggest = h1.suggest('testing')
        test_suffix = h1.suffix_suggest('testing')
        test_stem = h1.stem('testing')

        h1._suggest_cache['made-up'] = test_suggest
        assert h1.suggest('made-up') == test_suggest
        h1._suffix_cache['made-up'] = test_suffix
        assert h1.suffix_suggest('made-up') == test_suffix
        h1._stem_cache['made-up'] = test_stem
        assert h1.stem('made-up') == test_stem

        h1.save_cache()
        h1.clear_cache()
        del h1

        cacheman = get_cache_manager('disk_hun')
        cacheman.deregister_all_caches()
        assert len(cacheman.cache_by_name) == 0

        h2 = Hunspell('test',
                      hunspell_data_dir=DICT_DIR,
                      disk_cache_dir=temp_dir,
                      cache_manager='disk_hun')

        assert len(h2._suggest_cache) == 0
        assert len(h2._stem_cache) == 0
        assert h2.suggest('made-up') != test_suggest
        assert h2.suffix_suggest('made-up') != test_suffix
        assert h2.stem('made-up') != test_stem
    finally:
        shutil.rmtree(temp_dir)  # Nuke temp content
예제 #7
0
def test_non_overlapping_caches(hunspell):
    test_suggest = hunspell.suggest('testing')
    test_suffix = hunspell.suffix_suggest('testing')
    test_stem = hunspell.stem('testing')

    hunspell._suggest_cache['made-up'] = test_suggest
    assert hunspell.suggest('made-up') == test_suggest
    hunspell._suffix_cache['made-up'] = test_suffix
    assert hunspell.suffix_suggest('made-up') == test_suffix
    hunspell._stem_cache['made-up'] = test_stem
    assert hunspell.stem('made-up') == test_stem

    h2 = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
    assert h2.suggest('made-up') != test_suggest
    assert h2.stem('made-up') != test_stem
예제 #8
0
def test_clear_caches_non_peristance(hunspell):
    test_suggest = hunspell.suggest('testing')
    test_suffix = hunspell.suffix_suggest('testing')
    test_stem = hunspell.stem('testing')

    hunspell._suggest_cache['made-up'] = test_suggest
    assert hunspell.suggest('made-up') == test_suggest
    hunspell._suffix_cache['made-up'] = test_suffix
    assert hunspell.suffix_suggest('made-up') == test_suffix
    hunspell._stem_cache['made-up'] = test_stem
    assert hunspell.stem('made-up') == test_stem

    hunspell.clear_cache()

    del hunspell
    hunspell = Hunspell('test', hunspell_data_dir=DICT_DIR)
    assert hunspell.suggest('made-up') != test_suggest
    assert hunspell.suffix_suggest('made-up') != test_suffix
    assert hunspell.stem('made-up') != test_stem
예제 #9
0
def pluralize_advanced(
        singular: str,
        speller: Hunspell = None,
        ending_overrides: NounEndingMap = None) -> AdvancedPluralizationResult:

    if not speller:
        speller = ensure_hunspell_nl()

    plural = __pluralize(singular, ending_overrides)

    # empty plural - just stop
    if not plural:
        return AdvancedPluralizationResult(plural, None, (), None, None, False)

    # right spelled plural
    if speller.spell(plural):
        return AdvancedPluralizationResult(plural, plural, (), None, None,
                                           True)

    # if no rightly spelled word can be found, use suggestions,
    # replacement of the endings and the Hunspell dictionary if
    # we can find something that is spelled correctly.
    suggestions = speller.suggest(plural)
    search_result:SearchResult = \
        search_by_suggestions(plural, suggestions) or \
        search_by_dictionary(speller, plural) or \
        search_by_dictionary_plus_s(speller, singular)

    if search_result:
        return AdvancedPluralizationResult(plural, search_result.plural,
                                           suggestions,
                                           search_result.switched_ending_from,
                                           search_result.switched_ending_to,
                                           True)

    return AdvancedPluralizationResult(plural, None, (), None, None, False)
예제 #10
0
class Stem:
    """

    The Stem module deals with various tasks, mainly through the following functions:
        - `check_spelling`: spell error detection
        - `correct_spelling`: spell error correction
        - `analyze`: morphological analysis

    Please note that only Sorani is supported in this version in this module. The module is based on the [Kurdish Hunspell project](https://github.com/sinaahmadi/KurdishHunspell).

    Example:
    ```python
    >>> from klpt.stem import Stem
    >>> stemmer = Stem("Sorani", "Arabic")
    >>> stemmer.check_spelling("سوتاندبووت")
    False
    >>> stemmer.correct_spelling("سوتاندبووت")
    (False, ['ستاندبووت', 'سووتاندبووت', 'سووڕاندبووت', 'ڕووتاندبووت', 'فەوتاندبووت', 'بووژاندبووت'])
    >>> stemmer.analyze("دیتبامن")
    [{'pos': 'verb', 'description': 'past_stem_transitive_active', 'base': 'دیت', 'terminal_suffix': 'بامن'}]
    ```

    """

    def __init__(self, dialect, script):

        self.dialect = dialect
        self.script = script 

        self.hunspell_flags = {"po": "pos", "is": "description", "ts": "terminal_suffix", "ds": "formation"}
        if self.dialect == "Sorani" and self.script == "Arabic":
            self.huns = Hunspell("ckb-Arab", hunspell_data_dir=klpt.get_data("data/"))
        else:
            if not (self.dialect == "Kurmanji" and self.script == "Latin"):
                raise Exception("Sorry, only Sorani dialect in the Arabic script is supported now. Stay tuned for other dialects and scripts!")

    # def stem(self, word):
    #     """A function for stemming a single word"""
    #     pass

    # def lemmatize(self, word):
    #     """A function for lemmatization of a single word"""
    #     pass

    def check_spelling(self, word):
        """Check spelling of a word

        Args:
            word (str): input word to be spell-checked

        Raises:
            TypeError: only string as input

        Returns:
            bool: True if the spelling is correct, False if the spelling is incorrect
        """
        if not isinstance(word, str) or not (self.dialect == "Sorani" and self.script == "Arabic"):
            raise TypeError("Not supported yet.")
        else:
            return self.huns.spell(word)

    def correct_spelling(self, word):
        """
        Correct spelling errors if the input word is incorrect. It returns a tuple where the first element indicates the correctness of the word (True if correct, False if incorrect).
            If the input word is incorrect, suggestions are provided in a list as the second element of the tuple, as (False, []).
            If no suggestion is available, the list is returned empty as (True, []).

        Args:
            word (str): input word to be spell-checked

        Raises:
            TypeError: only string as input

        Returns:
            tuple (boolean, list)

        """
        if not isinstance(word, str) or not (self.dialect == "Sorani" and self.script == "Arabic"):
            raise TypeError("Not supported yet.")
        else:
            if self.check_spelling(word):
                return (True, [])
            return (False, list(self.huns.suggest(word)))

    def analyze(self, word_form):
        """
        Morphological analysis of a given word.
        
        It returns morphological analyses. The morphological analysis is returned as a dictionary as follows:
        
        - "pos": the part-of-speech of the word-form according to [the Universal Dependency tag set](https://universaldependencies.org/u/pos/index.html). 
        - "description": is flag
        - "terminal_suffix": anything except ts flag
        - "formation": if ds flag is set, its value is assigned to description and the value of formation is set to derivational. Although the majority of our morphological rules cover inflectional forms, it is not accurate to say all of them are inflectional. Therefore, we only set this value to derivational wherever we are sure.
        - "base": `ts` flag. The definition of terminal suffix is a bit tricky in Hunspell. According to [the Hunspell documentation](http://manpages.ubuntu.com/manpages/trusty/en/man4/hunspell.4.html), "Terminal suffix fields are inflectional suffix fields "removed" by additional (not terminal) suffixes". In other words, the ts flag in Hunspell represents whatever is left after stripping all affixes. Therefore, it is the morphological base.

        As in [{'pos': 'verb', 'description': 'past_stem_transitive_active', 'base': 'دیت', 'terminal_suffix': 'بامن'}]
        If the input cannot be analyzed morphologically, an empty list is returned.

        Sorani: 
        More details regarding Sorani Kurdish morphological analysis can be found at [https://github.com/sinaahmadi/KurdishHunspell](https://github.com/sinaahmadi/KurdishHunspell).

        Kurmanji:
        Regarding Kurmanji, we use the morphological analyzer provided by the [Kurmanji part](https://github.com/apertium/apertium-kmr)

        Please note that there are delicate difference between who the analyzers work in Hunspell and Apertium. For instane, the `base` in the Kurmanji analysis refers to the lemma while in Sorani (from Hunspell), it refers to the morphological base.

        Args:
            word_form (str): a single word-form

        Raises:
            TypeError: only string as input

        Returns:
            (list(dict)): a list of all possible morphological analyses according to the defined morphological rules
            
        """
        if not isinstance(word_form, str):
            raise TypeError("Only a word (str) is allowed.")
        else:
            word_analysis = list()
            if self.dialect == "Sorani" and self.script == "Arabic":
                # Given the morphological analysis of a word-form with Hunspell flags, extract relevant information and return a dictionary
                for analysis in list(self.huns.analyze(word_form)):
                    analysis_dict = dict()
                    for item in analysis.split():
                        if ":" not in item:
                            continue
                        if item.split(":")[1] == "ts":
                            # ts flag exceptionally appears after the value as value:key in the Hunspell output
                            analysis_dict["base"] = item.split(":")[0]
                            # anything except the terminal_suffix is considered to be the base
                            analysis_dict[self.hunspell_flags[item.split(":")[1]]] = word_form.replace(item.split(":")[0], "")
                        elif item.split(":")[0] in self.hunspell_flags.keys():
                            # assign the key:value pairs from the Hunspell string output to the dictionary output of the current function
                            # for ds flag, add derivation as the formation type, otherwise inflection
                            if item.split(":")[0] == "ds":
                                analysis_dict[self.hunspell_flags[item.split(":")[0]]] = "derivational"
                                analysis_dict[self.hunspell_flags["is"]] = item.split(":")[1]
                            else:
                                analysis_dict[self.hunspell_flags[item.split(":")[0]]] = item.split(":")[1]

                    # if there is no value assigned to the ts flag, the terminal suffix is a zero-morpheme 0
                    if self.hunspell_flags["ts"] not in analysis_dict or analysis_dict[self.hunspell_flags["ts"]] == "":
                        analysis_dict[self.hunspell_flags["ts"]] = "0"

                    word_analysis.append(analysis_dict)

            elif self.dialect == "Kurmanji" and self.script == "Latin":
                att_analysis = Analysis("Kurmanji", "Latin").analyze(word_form)
                # check if the word-form is analyzed or no
                if not len(att_analysis[1]):
                    # the word-form could not be analyzed
                    return []

                for form_analysis in list(att_analysis[-1]):
                    for analysis in form_analysis:
                        analysis_dict = dict()
                        structure = analysis[0].rsplit('@', 1)[1].split("<", 1)
                        analysis_dict["base"], analysis_dict["description"] = structure[0], structure[1].replace("><", "_").replace(">", "").strip()
                        analysis_dict["pos"] = ""
                        analysis_dict["terminal_suffix"] = ""
                        analysis_dict["formation"] = ""
                        # TODO: the description needs further information extraction in such a way that some values should be assigned to the "pos" key 
                        # analysis_dict["terminal_suffix"] = word_form.replace(analysis_dict["base"], "")
                        word_analysis.append(analysis_dict)

        return word_analysis
예제 #11
0
class Application:
    def __init__(self):

        self.hs = Hunspell('en_US')
        self.vs = cv2.VideoCapture(0)
        self.current_image = None
        self.current_image2 = None
        self.json_file = open("Models\model_new.json", "r")
        self.model_json = self.json_file.read()
        self.json_file.close()

        self.loaded_model = model_from_json(self.model_json)
        self.loaded_model.load_weights("Models\model_new.h5")

        self.json_file_dru = open("Models\model-bw_dru.json", "r")
        self.model_json_dru = self.json_file_dru.read()
        self.json_file_dru.close()

        self.loaded_model_dru = model_from_json(self.model_json_dru)
        self.loaded_model_dru.load_weights("Models\model-bw_dru.h5")
        self.json_file_tkdi = open("Models\model-bw_tkdi.json", "r")
        self.model_json_tkdi = self.json_file_tkdi.read()
        self.json_file_tkdi.close()

        self.loaded_model_tkdi = model_from_json(self.model_json_tkdi)
        self.loaded_model_tkdi.load_weights("Models\model-bw_tkdi.h5")
        self.json_file_smn = open("Models\model-bw_smn.json", "r")
        self.model_json_smn = self.json_file_smn.read()
        self.json_file_smn.close()

        self.loaded_model_smn = model_from_json(self.model_json_smn)
        self.loaded_model_smn.load_weights("Models\model-bw_smn.h5")

        self.ct = {}
        self.ct['blank'] = 0
        self.blank_flag = 0

        for i in ascii_uppercase:
            self.ct[i] = 0

        print("Loaded model from disk")

        self.root = tk.Tk()
        self.root.title("Sign Language To Text Conversion")
        self.root.protocol('WM_DELETE_WINDOW', self.destructor)
        self.root.geometry("900x900")

        self.panel = tk.Label(self.root)
        self.panel.place(x=100, y=10, width=580, height=580)

        self.panel2 = tk.Label(self.root)  # initialize image panel
        self.panel2.place(x=400, y=65, width=275, height=275)

        self.T = tk.Label(self.root)
        self.T.place(x=60, y=5)
        self.T.config(text="Sign Language To Text Conversion",
                      font=("Courier", 30, "bold"))

        self.panel3 = tk.Label(self.root)  # Current Symbol
        self.panel3.place(x=500, y=540)

        self.T1 = tk.Label(self.root)
        self.T1.place(x=10, y=540)
        self.T1.config(text="Character :", font=("Courier", 30, "bold"))

        self.panel4 = tk.Label(self.root)  # Word
        self.panel4.place(x=220, y=595)

        self.T2 = tk.Label(self.root)
        self.T2.place(x=10, y=595)
        self.T2.config(text="Word :", font=("Courier", 30, "bold"))

        self.panel5 = tk.Label(self.root)  # Sentence
        self.panel5.place(x=350, y=645)

        self.T3 = tk.Label(self.root)
        self.T3.place(x=10, y=645)
        self.T3.config(text="Sentence :", font=("Courier", 30, "bold"))

        self.T4 = tk.Label(self.root)
        self.T4.place(x=250, y=690)
        self.T4.config(text="Suggestions :",
                       fg="red",
                       font=("Courier", 30, "bold"))

        self.bt1 = tk.Button(self.root,
                             command=self.action1,
                             height=0,
                             width=0)
        self.bt1.place(x=26, y=745)

        self.bt2 = tk.Button(self.root,
                             command=self.action2,
                             height=0,
                             width=0)
        self.bt2.place(x=325, y=745)

        self.bt3 = tk.Button(self.root,
                             command=self.action3,
                             height=0,
                             width=0)
        self.bt3.place(x=625, y=745)

        self.str = ""
        self.word = " "
        self.current_symbol = "Empty"
        self.photo = "Empty"
        self.video_loop()

    def video_loop(self):
        ok, frame = self.vs.read()

        if ok:
            cv2image = cv2.flip(frame, 1)

            x1 = int(0.5 * frame.shape[1])
            y1 = 10
            x2 = frame.shape[1] - 10
            y2 = int(0.5 * frame.shape[1])

            cv2.rectangle(frame, (x1 - 1, y1 - 1), (x2 + 1, y2 + 1),
                          (255, 0, 0), 1)
            cv2image = cv2.cvtColor(cv2image, cv2.COLOR_BGR2RGBA)

            self.current_image = Image.fromarray(cv2image)
            imgtk = ImageTk.PhotoImage(image=self.current_image)

            self.panel.imgtk = imgtk
            self.panel.config(image=imgtk)

            cv2image = cv2image[y1:y2, x1:x2]

            gray = cv2.cvtColor(cv2image, cv2.COLOR_BGR2GRAY)

            blur = cv2.GaussianBlur(gray, (5, 5), 2)

            th3 = cv2.adaptiveThreshold(blur, 255,
                                        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                        cv2.THRESH_BINARY_INV, 11, 2)

            ret, res = cv2.threshold(th3, 70, 255,
                                     cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

            self.predict(res)

            self.current_image2 = Image.fromarray(res)

            imgtk = ImageTk.PhotoImage(image=self.current_image2)

            self.panel2.imgtk = imgtk
            self.panel2.config(image=imgtk)

            self.panel3.config(text=self.current_symbol, font=("Courier", 30))

            self.panel4.config(text=self.word, font=("Courier", 30))

            self.panel5.config(text=self.str, font=("Courier", 30))

            predicts = self.hs.suggest(self.word)

            if (len(predicts) > 1):

                self.bt1.config(text=predicts[0], font=("Courier", 20))

            else:

                self.bt1.config(text="")

            if (len(predicts) > 2):

                self.bt2.config(text=predicts[1], font=("Courier", 20))

            else:

                self.bt2.config(text="")

            if (len(predicts) > 3):

                self.bt3.config(text=predicts[2], font=("Courier", 20))

            else:

                self.bt3.config(text="")

        self.root.after(5, self.video_loop)

    def predict(self, test_image):

        test_image = cv2.resize(test_image, (128, 128))

        result = self.loaded_model.predict(test_image.reshape(1, 128, 128, 1))

        result_dru = self.loaded_model_dru.predict(
            test_image.reshape(1, 128, 128, 1))

        result_tkdi = self.loaded_model_tkdi.predict(
            test_image.reshape(1, 128, 128, 1))

        result_smn = self.loaded_model_smn.predict(
            test_image.reshape(1, 128, 128, 1))

        prediction = {}

        prediction['blank'] = result[0][0]

        inde = 1

        for i in ascii_uppercase:

            prediction[i] = result[0][inde]

            inde += 1

        #LAYER 1

        prediction = sorted(prediction.items(),
                            key=operator.itemgetter(1),
                            reverse=True)

        self.current_symbol = prediction[0][0]

        #LAYER 2

        if (self.current_symbol == 'D' or self.current_symbol == 'R'
                or self.current_symbol == 'U'):

            prediction = {}

            prediction['D'] = result_dru[0][0]
            prediction['R'] = result_dru[0][1]
            prediction['U'] = result_dru[0][2]

            prediction = sorted(prediction.items(),
                                key=operator.itemgetter(1),
                                reverse=True)

            self.current_symbol = prediction[0][0]

        if (self.current_symbol == 'D' or self.current_symbol == 'I'
                or self.current_symbol == 'K' or self.current_symbol == 'T'):

            prediction = {}

            prediction['D'] = result_tkdi[0][0]
            prediction['I'] = result_tkdi[0][1]
            prediction['K'] = result_tkdi[0][2]
            prediction['T'] = result_tkdi[0][3]

            prediction = sorted(prediction.items(),
                                key=operator.itemgetter(1),
                                reverse=True)

            self.current_symbol = prediction[0][0]

        if (self.current_symbol == 'M' or self.current_symbol == 'N'
                or self.current_symbol == 'S'):

            prediction1 = {}

            prediction1['M'] = result_smn[0][0]
            prediction1['N'] = result_smn[0][1]
            prediction1['S'] = result_smn[0][2]

            prediction1 = sorted(prediction1.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)

            if (prediction1[0][0] == 'S'):

                self.current_symbol = prediction1[0][0]

            else:

                self.current_symbol = prediction[0][0]

        if (self.current_symbol == 'blank'):

            for i in ascii_uppercase:
                self.ct[i] = 0

        self.ct[self.current_symbol] += 1

        if (self.ct[self.current_symbol] > 60):

            for i in ascii_uppercase:
                if i == self.current_symbol:
                    continue

                tmp = self.ct[self.current_symbol] - self.ct[i]

                if tmp < 0:
                    tmp *= -1

                if tmp <= 20:
                    self.ct['blank'] = 0

                    for i in ascii_uppercase:
                        self.ct[i] = 0
                    return

            self.ct['blank'] = 0

            for i in ascii_uppercase:
                self.ct[i] = 0

            if self.current_symbol == 'blank':

                if self.blank_flag == 0:
                    self.blank_flag = 1

                    if len(self.str) > 0:
                        self.str += " "

                    self.str += self.word

                    self.word = ""

            else:

                if (len(self.str) > 16):
                    self.str = ""

                self.blank_flag = 0

                self.word += self.current_symbol

    def action1(self):

        predicts = self.hs.suggest(self.word)

        if (len(predicts) > 0):

            self.word = ""

            self.str += " "

            self.str += predicts[0]

    def action2(self):

        predicts = self.hs.suggest(self.word)

        if (len(predicts) > 1):
            self.word = ""
            self.str += " "
            self.str += predicts[1]

    def action3(self):

        predicts = self.hs.suggest(self.word)

        if (len(predicts) > 2):
            self.word = ""
            self.str += " "
            self.str += predicts[2]

    def action4(self):

        predicts = self.hs.suggest(self.word)

        if (len(predicts) > 3):
            self.word = ""
            self.str += " "
            self.str += predicts[3]

    def action5(self):

        predicts = self.hs.suggest(self.word)

        if (len(predicts) > 4):
            self.word = ""
            self.str += " "
            self.str += predicts[4]

    def destructor(self):

        print("Closing Application...")

        self.root.destroy()
        self.vs.release()
        cv2.destroyAllWindows()
예제 #12
0
from hunspell import Hunspell

h = Hunspell("ko", hunspell_data_dir='ko')

if __name__ == "__main__":
    answer = h.spell("안녕하세요으")

    print(answer)
    answer2 = h.spell("안녕하세")
    print(answer2)

    answer3 = h.suggest("안녕하세요으")
    print(answer3)
예제 #13
0
class SpellChecker:
    """
    Class for managing spell checking using Hunspell. Implemented as a class, as multiple instances of a SpellChecker
    might be used to maintain different dictionaries simultaneously (for example adding custom words).
    """
    def __init__(self, allowed_punctuation_marks, dictionary_directory):
        """
        Constructor method. Declares and creates a new Hunspell object.
        """
        self.allowed_punctuation_marks = allowed_punctuation_marks
        self.dictionary_directory = dictionary_directory
        self.hunspell = None
        self.refresh_dict()

    def refresh_dict(self):
        """
        Create a new Hunspell object from the specified dictionary file.
        """
        self.hunspell = Hunspell('index',
                                 hunspell_data_dir=self.dictionary_directory)

    def is_punctuation_mark(self, word):
        """
        Checks if the given word corresponds to one of the allowed punctuation marks.
        :param word: a string with a single word
        :type: string
        :return: boolean indicating if the given word is an allowed punctuation mark
        :type: boolean
        """
        return bool(re.match(r'[%s]' % self.allowed_punctuation_marks, word))

    def is_correctly_spelled(self, word):
        """
        Checks if the given word is correctly spelled.
        :param word: a string with a single word
        :type: string
        :return: boolean indicating if the spelling of the word is correct
        :type: boolean
        """
        return self.hunspell.spell(word)

    def suggest(self, word):
        """
        Suggest similar and correctly spelled alternatives for the given string. Orders Hunspell suggestions by
        edit distance.
        :param word: a string with a single word
        :type: string
        :return: a list of suggestions
        :type: list<string>
        """
        suggestions = self.hunspell.suggest(word)
        return sorted(suggestions,
                      key=lambda suggestion: edit_distance(word, suggestion))

    def fix(self, word):
        """
        Fixes the spelling of the given word.
        :param word: a string with a single word
        :type: string
        :return: the same word if correctly spelled or a punctuation mark, otherwise the top Hunspell suggestion.
        """
        return word if self.is_punctuation_mark(
            word) or self.is_correctly_spelled(word) else self.suggest(word)[0]

    def fix_text(self, text):
        """
        Fixes the spelling of a multi-worded phrase.
        :param text: the phrase string
        :type: string
        :return: the same phrase, with the spelling of each word fixed.
        """
        fixed_text = ' '.join([self.fix(word) for word in word_tokenize(text)])
        return re.sub(r' ([%s])' % self.allowed_punctuation_marks, r'\1',
                      fixed_text)  # remove spaces preceding punctuation
예제 #14
0
 def test_hunspell_suggest(self):
     d = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
     self.assertListEqual(
         d.suggest('dpg'),
         ['dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg', 'GDP'])
     del d
예제 #15
0
class UnsupervisedGrammarCorrector:
    def __init__(self, threshold=0.96):
        basename = os.path.dirname(os.path.realpath(__file__))
        self.lm = LanguageModel()
        # Load spaCy
        self.nlp = spacy.load("en")
        # Hunspell spellchecker: https://pypi.python.org/pypi/CyHunspell
        # CyHunspell seems to be more accurate than Aspell in PyEnchant, but a bit slower.
        self.gb = Hunspell("en_GB-large",
                           hunspell_data_dir=basename + '/resources/spelling/')
        # Inflection forms: http://wordlist.aspell.net/other/
        self.gb_infl = loadWordFormDict(basename +
                                        "/resources/agid-2016.01.19/infl.txt")
        # List of common determiners
        self.determiners = {"", "the", "a", "an"}
        # List of common prepositions
        self.prepositions = {
            "", "about", "at", "by", "for", "from", "in", "of", "on", "to",
            "with"
        }
        self.threshold = threshold

    def correct(self, sentence):
        # If the line is empty, preserve the newline in output and continue
        if not sentence:
            return ""
        best = sentence
        score = self.lm.score(best)

        while True:
            new_best, new_score = self.process(best)
            if new_best and new_score > score:
                best = new_best
                score = new_score
            else:
                break

        return best

    def process(self, sentence: str) -> Tuple[str, bool]:
        # Process sent with spacy
        proc_sent = self.nlp.tokenizer(sentence)
        self.nlp.tagger(proc_sent)
        # Calculate avg token prob of the sent so far.
        orig_prob = self.lm.score(proc_sent.text)
        # Store all the candidate corrected sentences here
        candidates = []
        # Process each token.
        for tok in proc_sent:
            # SPELLCHECKING
            # Spell check: tok must be alphabetical and not a real word.

            candidate_tokens = set()

            lower_cased_token = tok.lower_

            if lower_cased_token.isalpha(
            ) and not self.gb.spell(lower_cased_token):
                candidate_tokens |= set(self.gb.suggest(lower_cased_token))
            # MORPHOLOGY
            if tok.lemma_ in self.gb_infl:
                candidate_tokens |= self.gb_infl[tok.lemma_]
            # DETERMINERS
            if lower_cased_token in self.determiners:
                candidate_tokens |= self.determiners
            # PREPOSITIONS
            if lower_cased_token in self.prepositions:
                candidate_tokens |= self.prepositions

            candidate_tokens = [
                c for c in candidate_tokens if self.gb.spell(c)
            ]

            if candidate_tokens:
                if tok.is_title:
                    candidate_tokens = [c.title() for c in candidate_tokens]
                elif tok.is_upper:
                    candidate_tokens = [c.upper() for c in candidate_tokens]

                candidates.extend(
                    self._generate_candidates(tok.i, candidate_tokens,
                                              proc_sent))

        best_prob = orig_prob
        best = sentence

        for candidate in candidates:
            # Score the candidate sentence
            cand_prob = self.lm.score(candidate.text)
            print(candidate.text, self.lm.score(candidate.text), cand_prob)

            # Compare cand_prob against weighted orig_prob and best_prob
            if cand_prob > best_prob:
                best_prob = cand_prob
                best = candidate.text
        # Return the best sentence and a boolean whether to search for more errors
        return best, best_prob

    def _generate_candidates(self, tok_id, candidate_tokens,
                             tokenized_sentence) -> List[str]:
        # Save candidates here.
        candidates = []

        prefix = tokenized_sentence[:tok_id]
        suffix = tokenized_sentence[tok_id + 1:]
        # Loop through the input alternative candidates
        for token in candidate_tokens:
            candidate = prefix.text_with_ws
            if token:
                candidate += token + " "
            candidate += suffix.text_with_ws
            candidate = self.nlp.tokenizer(candidate)
            candidates.append(candidate)
        return candidates
예제 #16
0
 def test_hunspell_suggest(self):
     d = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
     self.assertListEqual(d.suggest('dpg'), ['dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg', 'GDP'])
     del d
예제 #17
0
class HunspellTest(unittest.TestCase):
    def assertRegexpSearch(self, *args, **kwargs):
        if PY3:
            self.assertRegex(*args, **kwargs)
        else:
            self.assertRegexpMatches(*args, **kwargs)

    def setUp(self):
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)

    def tearDown(self):
        try:
            del self.h
        except AttributeError:
            pass

    def assertAllIn(self, checked, expected):
        self.assertTrue(all(x in expected for x in checked),
            u"{} not all found in {}".format(checked, expected))

    def test_create_destroy(self):
        del self.h

    def test_missing_dict(self):
        with self.assertRaises(HunspellFilePathError):
            Hunspell('not_avail', hunspell_data_dir=DICT_DIR)

    @patch('os.path.isfile', return_value=True)
    @patch('os.access', return_value=True)
    def test_bad_path_encoding(self, *mocks):
        if PY3:
            with self.assertRaises(HunspellFilePathError):
                Hunspell('not_checked',
                    hunspell_data_dir=u'bad/\udcc3/decoding')
        else:
            # Python 2 just make an illegal string instead of raising
            with captured_c_stderr_file() as caperr:
                Hunspell('not_checked',
                    hunspell_data_dir=u'bad/\udcc3/decoding')
                with open(caperr, 'r') as err:
                    self.assertRegexpSearch(err.read(), r'error:[^\n]*bad/[^\n]*/decoding')

    @patch('hunspell.hunspell.WIN32_LONG_PATH_PREFIX', '/not/valid')
    def test_windows_utf_8_encoding_applies_prefix(self, *mocks):
        with captured_c_stderr_file() as caperr:
            with patch("os.name", 'nt'):
                # If python file existance checks used prefix, this would raise a HunspellFilePathError
                Hunspell('test', system_encoding='UTF-8')
            with open(caperr, 'r') as err:
                # But the Hunspell library lookup had the prefix applied
                self.assertRegexpSearch(err.read(), r'error:[^\n]*/not/valid[^\n]*')

    def test_spell(self):
        self.assertFalse(self.h.spell('dpg'))
        self.assertTrue(self.h.spell('dog'))

    def test_spell_utf8(self):
        self.assertTrue(self.h.spell(u'café'))
        self.assertFalse(self.h.spell(u'uncafé'))

    def test_spell_empty(self):
        self.assertTrue(self.h.spell(''))

    def test_suggest(self):
        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        suggest = self.h.suggest('dpg')
        self.assertIsInstance(suggest, tuple)
        self.assertAllIn(required, suggest)

    def test_suggest_utf8(self):
        required = (u'café', u'Cerf')
        for variant in ('cefé', u'cefé'):
            suggest = self.h.suggest(variant)
            self.assertIsInstance(suggest, tuple)
            self.assertAllIn(required, suggest)

    def test_suggest_empty(self):
        self.assertEqual(self.h.suggest(''), ())

    def test_stem(self):
        self.assertEqual(self.h.stem('dog'), ('dog',))
        self.assertEqual(self.h.stem('permanently'), ('permanent',))

    def test_add(self):
        word = 'outofvocabularyword'
        self.assertEqual(self.h.spell(word), False)
        self.h.add(word)
        self.assertEqual(self.h.spell(word), True)
        typo = word + 'd'
        self.assertAllIn([word], self.h.suggest(typo))

    def test_bulk_suggest(self):
        self.h.set_concurrency(3)
        suggest = self.h.bulk_suggest(['dog', 'dpg'])
        self.assertEqual(sorted(suggest.keys()), ['dog', 'dpg'])
        self.assertIsInstance(suggest['dog'], tuple)
        self.assertAllIn(('dog',), suggest['dog'])

        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        self.assertIsInstance(suggest['dpg'], tuple)
        self.assertAllIn(required, suggest['dpg'])

        checked = ['bjn', 'dog', 'dpg', 'dyg', 'foo', 'frg', 'opg', 'pgg', 'qre', 'twg']
        suggest = self.h.bulk_suggest(checked)
        self.assertEqual(sorted(suggest.keys()), checked)

    def test_bulk_stem(self):
        self.h.set_concurrency(3)
        self.assertDictEqual(self.h.bulk_stem(['dog', 'permanently']), {
            'permanently': ('permanent',),
            'dog': ('dog',)
        })
        self.assertDictEqual(self.h.bulk_stem(['dog', 'twigs', 'permanently', 'unrecorded']), {
            'unrecorded': ('recorded',),
            'permanently': ('permanent',),
            'twigs': ('twig',),
            'dog': ('dog',)
        })

    def test_non_overlapping_caches(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        h2 = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
        self.assertNotEqual(h2.suggest('made-up'), test_suggest)
        self.assertNotEqual(h2.stem('made-up'), test_stem)

    def test_overlapping_caches(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        del self.h
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.assertEqual(self.h.stem('made-up'), test_stem)

    def test_save_caches_persistance(self):
        temp_dir = tempfile.mkdtemp()
        try:
            h1 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')
            test_suggest = h1.suggest('testing')
            test_stem = h1.stem('testing')

            h1._suggest_cache['made-up'] = test_suggest
            self.assertEqual(h1.suggest('made-up'), test_suggest)
            h1._stem_cache['made-up'] = test_stem
            self.assertEqual(h1.stem('made-up'), test_stem)

            h1.save_cache()
            del h1

            cacheman = get_cache_manager('disk_hun')
            cacheman.deregister_all_caches()
            self.assertEqual(len(cacheman.cache_by_name), 0)

            h2 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')

            self.assertNotEqual(len(h2._suggest_cache), 0)
            self.assertNotEqual(len(h2._stem_cache), 0)
            self.assertEqual(h2.suggest('made-up'), test_suggest)
            self.assertEqual(h2.stem('made-up'), test_stem)
        finally:
            shutil.rmtree(temp_dir) # Nuke temp content

    def test_clear_caches_persistance(self):
        temp_dir = tempfile.mkdtemp()
        try:
            h1 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')
            test_suggest = h1.suggest('testing')
            test_stem = h1.stem('testing')

            h1._suggest_cache['made-up'] = test_suggest
            self.assertEqual(h1.suggest('made-up'), test_suggest)
            h1._stem_cache['made-up'] = test_stem
            self.assertEqual(h1.stem('made-up'), test_stem)

            h1.save_cache()
            h1.clear_cache()
            del h1

            cacheman = get_cache_manager('disk_hun')
            cacheman.deregister_all_caches()
            self.assertEqual(len(cacheman.cache_by_name), 0)

            h2 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')

            self.assertEqual(len(h2._suggest_cache), 0)
            self.assertEqual(len(h2._stem_cache), 0)
            self.assertNotEqual(h2.suggest('made-up'), test_suggest)
            self.assertNotEqual(h2.stem('made-up'), test_stem)
        finally:
            shutil.rmtree(temp_dir) # Nuke temp content

    def test_clear_caches_non_peristance(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        self.h.clear_cache()

        del self.h
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)
        self.assertNotEqual(self.h.suggest('made-up'), test_suggest)
        self.assertNotEqual(self.h.stem('made-up'), test_stem)
예제 #18
0
class Application:
    def __init__(self):
        self.directory = "model/"
        self.hs = Hunspell('en_US')
        self.vs = cv2.VideoCapture(0)
        self.current_image = None
        self.current_image2 = None

        self.json_file = open(self.directory + "model.json", "r")
        self.model_json = self.json_file.read()
        self.json_file.close()
        self.loaded_model = model_from_json(self.model_json)
        self.loaded_model.load_weights(self.directory + "model.h5")

        self.json_file_dru = open(self.directory + "model_dru.json", "r")
        self.model_json_dru = self.json_file_dru.read()
        self.json_file_dru.close()
        self.loaded_model_dru = model_from_json(self.model_json_dru)
        self.loaded_model_dru.load_weights(self.directory + "model_dru.h5")

        self.json_file_tkdi = open(self.directory + "model_tkdi.json", "r")
        self.model_json_tkdi = self.json_file_tkdi.read()
        self.json_file_tkdi.close()
        self.loaded_model_tkdi = model_from_json(self.model_json_tkdi)
        self.loaded_model_tkdi.load_weights(self.directory + "model_tkdi.h5")

        self.json_file_smn = open(self.directory + "model_smn.json", "r")
        self.model_json_smn = self.json_file_smn.read()
        self.json_file_smn.close()
        self.loaded_model_smn = model_from_json(self.model_json_smn)
        self.loaded_model_smn.load_weights(self.directory + "model_smn.h5")

        self.ct = {}
        self.ct['blank'] = 0
        self.blank_flag = 0
        for i in ascii_uppercase:
            self.ct[i] = 0
        print("Loaded model from disk")

        self.root = tk.Tk()
        self.root.title("Sign language to Text Converter")
        self.root.protocol('WM_DELETE_WINDOW', self.destructor)
        self.root.geometry("1100x1100")

        self.canvas = tk.Canvas(width=1100, height=1100)
        self.canvas.pack(fill="both", expand=True)

        self.panel = tk.Label(self.root)
        self.panel.place(x=135, y=90, width=640, height=480)

        self.panel2 = tk.Label(self.root)  # initialize image panel
        self.panel2.place(x=460, y=95, width=310, height=310)

        self.canvas.create_text(450,
                                50,
                                text="Sign Language to Text",
                                fill="black",
                                font=("courier", 30, "bold"))

        self.panel3 = tk.Label(self.root)  # Current Symbol
        self.panel3.place(x=500, y=600)
        self.canvas.create_text(155,
                                653,
                                text="Character:",
                                fill="black",
                                font=("courier", 30, "bold"))

        self.panel4 = tk.Label(self.root)  # Word
        self.panel4.place(x=220, y=680)
        self.canvas.create_text(110,
                                713,
                                text="Word:",
                                fill="black",
                                font=("courier", 30, "bold"))

        self.panel5 = tk.Label(self.root)  # Sentence
        self.panel5.place(x=350, y=740)
        self.canvas.create_text(140,
                                773,
                                text="Sentence:",
                                fill="black",
                                font=("courier", 30, "bold"))

        self.T4 = tk.Label(self.root)
        self.T4.place(x=270, y=800)
        self.T4.config(text="Suggestions",
                       fg="red",
                       font=("Courier", 20, "bold"))

        self.btcall = tk.Button(self.root,
                                command=self.action_call,
                                height=0,
                                width=0)
        self.btcall.config(text="About",
                           bg="black",
                           fg="white",
                           font=("Courier", 14))
        self.btcall.place(x=950, y=20)

        self.bt1 = tk.Button(self.root,
                             bg="#DAF7A6",
                             activebackground='white',
                             command=self.action1,
                             height=0,
                             width=0)
        self.bt1.place(x=25, y=890)

        self.bt2 = tk.Button(self.root,
                             bg="#DAF7A6",
                             activebackground='white',
                             command=self.action2,
                             height=0,
                             width=0)
        self.bt2.place(x=325, y=890)

        self.bt3 = tk.Button(self.root,
                             bg="#DAF7A6",
                             activebackground='white',
                             command=self.action3,
                             height=0,
                             width=0)
        self.bt3.place(x=625, y=890)

        self.bt4 = tk.Button(self.root,
                             bg="#DAF7A6",
                             activebackground='white',
                             command=self.action4,
                             height=0,
                             width=0)
        self.bt4.place(x=25, y=950)

        self.bt5 = tk.Button(self.root,
                             bg="#DAF7A6",
                             activebackground='white',
                             command=self.action5,
                             height=0,
                             width=0)
        self.bt5.place(x=325, y=950)

        self.bt6 = tk.Button(self.root,
                             text="Audio",
                             bg="#DAF7A6",
                             activebackground='white',
                             font=("Courier", 20))
        self.bt6.place(x=930, y=80)

        self.bt7 = tk.Button(self.root,
                             text="Backspace",
                             bg="#DAF7A6",
                             activebackground='white',
                             font=("Courier", 20))
        self.bt7.place(x=880, y=140)

        self.bt8 = tk.Button(self.root,
                             text="Reset",
                             bg="#DAF7A6",
                             activebackground='white',
                             font=("Courier", 20))
        self.bt8.place(x=930, y=200)

        self.str = ""
        self.word = ""
        self.current_symbol = "Empty"
        self.photo = "Empty"
        self.video_loop()

    def video_loop(self):
        ok, frame = self.vs.read()
        if ok:
            cv2image = cv2.flip(frame, 1)
            x1 = int(0.5 * frame.shape[1])
            y1 = 10
            x2 = frame.shape[1] - 10
            y2 = int(0.5 * frame.shape[1])
            cv2.rectangle(frame, (x1 - 1, y1 - 1), (x2 + 1, y2 + 1),
                          (255, 0, 0), 1)
            cv2image = cv2.cvtColor(cv2image, cv2.COLOR_BGR2RGBA)
            self.current_image = Image.fromarray(cv2image)
            imgtk = ImageTk.PhotoImage(image=self.current_image)
            self.panel.imgtk = imgtk
            self.panel.config(image=imgtk)
            cv2image = cv2image[y1:y2, x1:x2]
            gray = cv2.cvtColor(cv2image, cv2.COLOR_BGR2GRAY)
            blur = cv2.GaussianBlur(gray, (5, 5), 2)
            th3 = cv2.adaptiveThreshold(blur, 255,
                                        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                        cv2.THRESH_BINARY_INV, 11, 2)
            ret, res = cv2.threshold(th3, 70, 255,
                                     cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
            self.predict(res)
            self.current_image2 = Image.fromarray(res)
            imgtk = ImageTk.PhotoImage(image=self.current_image2)
            self.panel2.imgtk = imgtk
            self.panel2.config(image=imgtk)
            self.panel3.config(text=self.current_symbol, font=("Courier", 35))
            self.panel4.config(text=self.word, font=("Courier", 25))
            self.panel5.config(text=self.str, font=("Courier", 25))
            predicts = self.hs.suggest(self.word)
            if (len(predicts) > 0):
                self.bt1.config(text=predicts[0], font=("Courier", 20))
            else:
                self.bt1.config(text="")
            if (len(predicts) > 1):
                self.bt2.config(text=predicts[1], font=("Courier", 20))
            else:
                self.bt2.config(text="")
            if (len(predicts) > 2):
                self.bt3.config(text=predicts[2], font=("Courier", 20))
            else:
                self.bt3.config(text="")
            if (len(predicts) > 3):
                self.bt4.config(text=predicts[3], font=("Courier", 20))
            else:
                self.bt4.config(text="")
            if (len(predicts) > 4):
                self.bt5.config(text=predicts[4], font=("Courier", 20))
            else:
                self.bt5.config(text="")
        self.root.after(30, self.video_loop)

    def predict(self, test_image):
        test_image = cv2.resize(test_image, (128, 128))
        result = self.loaded_model.predict(test_image.reshape(1, 128, 128, 1))
        result_dru = self.loaded_model_dru.predict(
            test_image.reshape(1, 128, 128, 1))
        result_tkdi = self.loaded_model_tkdi.predict(
            test_image.reshape(1, 128, 128, 1))
        result_smn = self.loaded_model_smn.predict(
            test_image.reshape(1, 128, 128, 1))
        prediction = {}
        prediction['blank'] = result[0][0]
        inde = 1
        for i in ascii_uppercase:
            prediction[i] = result[0][inde]
            inde += 1

        #LAYER 1
        prediction = sorted(prediction.items(),
                            key=operator.itemgetter(1),
                            reverse=True)
        self.current_symbol = prediction[0][0]

        #LAYER 2
        if (self.current_symbol == 'D' or self.current_symbol == 'R'
                or self.current_symbol == 'U'):
            prediction = {}
            prediction['D'] = result_dru[0][0]
            prediction['R'] = result_dru[0][1]
            prediction['U'] = result_dru[0][2]
            prediction = sorted(prediction.items(),
                                key=operator.itemgetter(1),
                                reverse=True)
            self.current_symbol = prediction[0][0]

        if (self.current_symbol == 'D' or self.current_symbol == 'I'
                or self.current_symbol == 'K' or self.current_symbol == 'T'):
            prediction = {}
            prediction['D'] = result_tkdi[0][0]
            prediction['I'] = result_tkdi[0][1]
            prediction['K'] = result_tkdi[0][2]
            prediction['T'] = result_tkdi[0][3]
            prediction = sorted(prediction.items(),
                                key=operator.itemgetter(1),
                                reverse=True)
            self.current_symbol = prediction[0][0]

        if (self.current_symbol == 'M' or self.current_symbol == 'N'
                or self.current_symbol == 'S'):
            prediction1 = {}
            prediction1['M'] = result_smn[0][0]
            prediction1['N'] = result_smn[0][1]
            prediction1['S'] = result_smn[0][2]
            prediction1 = sorted(prediction1.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)
            if (prediction1[0][0] == 'S'):
                self.current_symbol = prediction1[0][0]
            else:
                self.current_symbol = prediction[0][0]

        if (self.current_symbol == 'blank'):
            for i in ascii_uppercase:
                self.ct[i] = 0
        self.ct[self.current_symbol] += 1

        if (self.ct[self.current_symbol] > 15):  # 60
            for i in ascii_uppercase:
                if i == self.current_symbol:
                    print(i)
                    continue
                tmp = self.ct[self.current_symbol] - self.ct[i]
                if tmp < 0:
                    tmp *= -1
                if tmp <= 5:  # 20
                    self.ct['blank'] = 0
                    for i in ascii_uppercase:
                        self.ct[i] = 0
                    return
            self.ct['blank'] = 0
            for i in ascii_uppercase:
                self.ct[i] = 0
            if self.current_symbol == 'blank':
                if self.blank_flag == 0:
                    self.blank_flag = 1
                    if len(self.str) > 0:
                        self.str += " "
                    self.str += self.word
                    self.word = ""
                    print(self.str)

                    def Text_to_speech():  # for audio output
                        if os.path.exists("audio.mp3"):
                            os.remove("audio.mp3")
                        Message = self.str
                        speech = gTTS(text=Message)
                        speech.save('audio.mp3')
                        playsound('audio.mp3')

                    def erase():  # for reset
                        self.str = ""

                    def Back_Space():  # for correction
                        self.str = self.str.rstrip(self.str[-1])

                    self.bt6.config(command=Text_to_speech)
                    self.bt7.config(command=Back_Space)
                    self.bt8.config(command=erase)
            else:
                if (len(self.str) > 16):
                    self.str = ""
                self.blank_flag = 0
                self.word += self.current_symbol
                print(self.str)

    def action1(self):
        predicts = self.hs.suggest(self.word)
        if (len(predicts) > 0):
            self.word = ""
            self.str += " "
            self.str += predicts[0]

    def action2(self):
        predicts = self.hs.suggest(self.word)
        if (len(predicts) > 1):
            self.word = ""
            self.str += " "
            self.str += predicts[1]

    def action3(self):
        predicts = self.hs.suggest(self.word)
        if (len(predicts) > 2):
            self.word = ""
            self.str += " "
            self.str += predicts[2]

    def action4(self):
        predicts = self.hs.suggest(self.word)
        if (len(predicts) > 3):
            self.word = ""
            self.str += " "
            self.str += predicts[3]

    def action5(self):
        predicts = self.hs.suggest(self.word)
        if (len(predicts) > 4):
            self.word = ""
            self.str += " "
            self.str += predicts[4]

    def destructor(self):
        print("Closing Application...")
        self.root.destroy()
        self.vs.release()
        cv2.destroyAllWindows()

    def destructor1(self):
        print("Closing Application...")
        self.root1.destroy()

    def action_call(self):
        self.root1 = tk.Toplevel(self.root)
        self.root1.title("About")
        self.root1.protocol('WM_DELETE_WINDOW', self.destructor1)
        self.root1.geometry("900x900")

        self.tx = tk.Label(self.root1)
        self.tx.place(x=360, y=40)
        self.tx.config(text="Efforts By", font=("Courier", 20, "bold"))

        self.photo1 = tk.PhotoImage(file='Pictures/chiranjit.png')
        self.w1 = tk.Label(self.root1, image=self.photo1)
        self.w1.place(x=170, y=105)
        self.tx6 = tk.Label(self.root1)
        self.tx6.place(x=170, y=310)
        self.tx6.config(text="Chiranjit\n170130103093",
                        font=("Courier", 15, "bold"))

        self.photo2 = tk.PhotoImage(file='Pictures/mitesh.png')
        self.w2 = tk.Label(self.root1, image=self.photo2)
        self.w2.place(x=380, y=105)
        self.tx2 = tk.Label(self.root1)
        self.tx2.place(x=380, y=310)
        self.tx2.config(text="Mitesh\n170130103115",
                        font=("Courier", 15, "bold"))

        self.photo3 = tk.PhotoImage(file='Pictures/harshil.png')
        self.w3 = tk.Label(self.root1, image=self.photo3)
        self.w3.place(x=590, y=105)
        self.tx3 = tk.Label(self.root1)
        self.tx3.place(x=590, y=310)
        self.tx3.config(text="Harshil\n170130103092",
                        font=("Courier", 15, "bold"))

        self.tx7 = tk.Label(self.root1)
        self.tx7.place(x=220, y=380)
        self.tx7.config(text="Under the supervision of",
                        font=("Courier", 20, "bold"))

        self.photo6 = tk.PhotoImage(file='Pictures/sir.png')
        self.w6 = tk.Label(self.root1, image=self.photo6)
        self.w6.place(x=380, y=430)
        self.tx6 = tk.Label(self.root1)
        self.tx6.place(x=230, y=640)
        self.tx6.config(text="Prof. Manan M. Nanavati",
                        font=("Courier", 20, "bold"))
예제 #19
0
class CyHunspell():
    '''
    Спеллер на основе cython версии hunspell
    
    >>> word_en = 'cookbok'
    >>> word_ru = 'поваринная'
    >>> speller_en = CyHunspell(lang="en")
    >>> speller_en.spell(word_en)
    False
    >>> speller_en.suggest(word_en)
    ('cookbook', 'copybook', 'codebook', 'Cook', 'cook')
    >>> speller_en.replace(word_en)
    'cookbook'
    >>> speller_ru = CyHunspell(lang="ru")
    >>> speller_ru.spell(word_ru)
    False
    >>> speller_ru.suggest(word_ru)
    ('поваренная',)
    >>> speller_ru.replace(word_ru)
    'поваренная'
    '''

    langs = {'ru': 'ru_RU', 'en': 'en_US'}

    def __init__(
        self,
        lang='en',
        max_dist=2,
        cpu=os.cpu_count(),
        # cache_manager="hunspell",disk_cache_dir=None,
        # hunspell_data_dir=None,system_encoding=None
        spell_kwargs={}):

        self.lang = self.langs.get(lang, lang)
        self.spell_dict = Hunspell(self.lang, **spell_kwargs)
        self.max_dist = max_dist
        self.spell_dict.set_concurrency(cpu)

    def spell(self, word):

        try:
            result = self.spell_dict.spell(word)
        except UnicodeEncodeError as err:
            result = None
        return result

    def suggest(self, word):

        try:
            result = self.spell_dict.suggest(word)
        except UnicodeEncodeError as err:
            result = tuple()
        return result

    def replace(self, word, max_dist=None):
        max_dist = max_dist if max_dist is not None else self.max_dist

        if self.spell(word):
            return word
        suggestions = self.suggest(word)
        if (suggestions and edit_distance(word, suggestions[0]) <= max_dist):
            return suggestions[0]
        else:
            return word
예제 #20
0
class HunspellTest(unittest.TestCase):
    def setUp(self):
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)

    def tearDown(self):
        try:
            del self.h
        except AttributeError:
            pass

    def assertAllIn(self, checked, expected):
        self.assertTrue(all(x in expected for x in checked),
                        u"{} not all found in {}".format(checked, expected))

    def test_create_destroy(self):
        del self.h

    def test_missing_dict(self):
        with self.assertRaises(IOError):
            Hunspell('not_avail', hunspell_data_dir=DICT_DIR)

    def test_spell(self):
        self.assertFalse(self.h.spell('dpg'))
        self.assertTrue(self.h.spell('dog'))

    def test_spell_utf8(self):
        self.assertTrue(self.h.spell(u'café'))
        self.assertFalse(self.h.spell(u'uncafé'))

    def test_spell_empty(self):
        self.assertTrue(self.h.spell(''))

    def test_suggest(self):
        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        suggest = self.h.suggest('dpg')
        self.assertIsInstance(suggest, tuple)
        self.assertAllIn(required, suggest)

    def test_suggest_utf8(self):
        required = (u'café', u'Cerf')
        for variant in ('cefé', u'cefé'):
            suggest = self.h.suggest(variant)
            self.assertIsInstance(suggest, tuple)
            self.assertAllIn(required, suggest)

    def test_suggest_empty(self):
        self.assertEqual(self.h.suggest(''), ())

    def test_stem(self):
        self.assertEqual(self.h.stem('dog'), ('dog', ))
        self.assertEqual(self.h.stem('permanently'), ('permanent', ))

    def test_bulk_suggest(self):
        self.h.set_concurrency(3)
        suggest = self.h.bulk_suggest(['dog', 'dpg'])
        self.assertEqual(sorted(suggest.keys()), ['dog', 'dpg'])
        self.assertIsInstance(suggest['dog'], tuple)
        self.assertAllIn(('dog', ), suggest['dog'])

        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        self.assertIsInstance(suggest['dpg'], tuple)
        self.assertAllIn(required, suggest['dpg'])

        checked = [
            'bjn', 'dog', 'dpg', 'dyg', 'foo', 'frg', 'opg', 'pgg', 'qre',
            'twg'
        ]
        suggest = self.h.bulk_suggest(checked)
        self.assertEqual(sorted(suggest.keys()), checked)

    def test_bulk_stem(self):
        self.h.set_concurrency(3)
        self.assertDictEqual(self.h.bulk_stem(['dog', 'permanently']), {
            'permanently': ('permanent', ),
            'dog': ('dog', )
        })
        self.assertDictEqual(
            self.h.bulk_stem(['dog', 'twigs', 'permanently', 'unrecorded']), {
                'unrecorded': ('recorded', ),
                'permanently': ('permanent', ),
                'twigs': ('twig', ),
                'dog': ('dog', )
            })
예제 #21
0
class HunspellTest(unittest.TestCase):
    def setUp(self):
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)

    def tearDown(self):
        try:
            del self.h
        except AttributeError:
            pass

    def assertAllIn(self, checked, expected):
        self.assertTrue(all(x in expected for x in checked),
            u"{} not all found in {}".format(checked, expected))

    def test_create_destroy(self):
        del self.h

    def test_missing_dict(self):
        with self.assertRaises(IOError):
            Hunspell('not_avail', hunspell_data_dir=DICT_DIR)

    def test_spell(self):
        self.assertFalse(self.h.spell('dpg'))
        self.assertTrue(self.h.spell('dog'))

    def test_spell_utf8(self):
        self.assertTrue(self.h.spell(u'café'))
        self.assertFalse(self.h.spell(u'uncafé'))

    def test_spell_empty(self):
        self.assertTrue(self.h.spell(''))

    def test_suggest(self):
        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        suggest = self.h.suggest('dpg')
        self.assertIsInstance(suggest, tuple)
        self.assertAllIn(required, suggest)

    def test_suggest_utf8(self):
        required = (u'café', u'Cerf')
        for variant in ('cefé', u'cefé'):
            suggest = self.h.suggest(variant)
            self.assertIsInstance(suggest, tuple)
            self.assertAllIn(required, suggest)

    def test_suggest_empty(self):
        self.assertEqual(self.h.suggest(''), ())

    def test_stem(self):
        self.assertEqual(self.h.stem('dog'), ('dog',))
        self.assertEqual(self.h.stem('permanently'), ('permanent',))

    def test_bulk_suggest(self):
        self.h.set_concurrency(3)
        suggest = self.h.bulk_suggest(['dog', 'dpg'])
        self.assertEqual(sorted(suggest.keys()), ['dog', 'dpg'])
        self.assertIsInstance(suggest['dog'], tuple)
        self.assertAllIn(('dog',), suggest['dog'])

        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        self.assertIsInstance(suggest['dpg'], tuple)
        self.assertAllIn(required, suggest['dpg'])

        checked = ['bjn', 'dog', 'dpg', 'dyg', 'foo', 'frg', 'opg', 'pgg', 'qre', 'twg']
        suggest = self.h.bulk_suggest(checked)
        self.assertEqual(sorted(suggest.keys()), checked)

    def test_bulk_stem(self):
        self.h.set_concurrency(3)
        self.assertDictEqual(self.h.bulk_stem(['dog', 'permanently']), {
            'permanently': ('permanent',),
            'dog': ('dog',)
        })
        self.assertDictEqual(self.h.bulk_stem(['dog', 'twigs', 'permanently', 'unrecorded']), {
            'unrecorded': ('recorded',),
            'permanently': ('permanent',),
            'twigs': ('twig',),
            'dog': ('dog',)
        })
예제 #22
0
class Stem():
    """The Stem class deals with various tasks as follows:
        - spell error detection and correction
        - morphological analysis
        - stemming

        These tasks are carried out in the `Kurdish Hunspell project <https://github.com/sinaahmadi/KurdishHunspell>`_.

    """
    def __init__(self, dialect, script):
        self.hunspell_flags = {
            "po": "pos",
            "is": "description",
            "ts": "terminal_suffix",
            "ds": "formation"
        }
        if dialect == "Sorani" and script == "Arabic":
            self.huns = Hunspell("ckb-Arab",
                                 hunspell_data_dir=klpt.get_data("data/"))
        else:
            raise Exception(
                "Sorry, only Sorani dialect in the Arabic script is supported now. Stay tuned for other dialects and scripts!"
            )

    # def stem(self, word):
    #     """A function for stemming a single word"""
    #     pass

    # def lemmatize(self, word):
    #     """A function for lemmatization of a single word"""
    #     pass

    def check_spelling(self, word):
        """Check spelling of a word

        Args:
            word (str): input word to be spell-checked

        Raises:
            TypeError: only string as input

        Returns:
            bool: True if the spelling is correct, False if the spelling is incorrect
        """
        if not isinstance(word, str):
            raise TypeError("Only a word (str) is allowed.")
        else:
            return self.huns.spell(word)

    def correct_spelling(self, word):
        """Correct spelling errors if the input word is incorrect

        Args:
            word (str): input word to be spell-checked

        Raises:
            TypeError: only string as input

        Returns:
            tuple (boolean, list): a tuple where the first element indicates the correctness of the word (True if correct, False if incorrect).
            If the input word is incorrect, suggestions are provided in a list as the second element of the tuple, as (False, []).
            If no suggestion is available, the list is returned empty as (True, []).
        """
        if not isinstance(word, str):
            raise TypeError("Only a word (str) is allowed.")
        else:
            if self.check_spelling(word):
                return (True, [])
            return (False, list(self.huns.suggest(word)))

    def analyze(self, word_form):
        """Morphological analysis of a given word
        More details regarding Kurdish morphological analysis can be found at https://github.com/sinaahmadi/KurdishHunspell

        Args:
            word_form (str): a single word-form

        Raises:
            TypeError: only string as input

        Returns:
            (list(dict)): a list of all possible morphological analyses according to the defined morphological rules
            
            The morphological analysis is returned as a dictionary as follows:
             - "pos": the part-of-speech of the word-form according to `the Universal Dependency tag set <https://universaldependencies.org/u/pos/index.html>`_ 
             - "description": is flag
             - "terminal_suffix": anything except ts flag
             - "formation": if ds flag is set, its value is assigned to description and the value of formation is set to derivational. Although the majority of our morphological rules cover inflectional forms, it is not accurate to say all of them are inflectional. Therefore, we only set this value to derivational wherever we are sure.
             - "base": `ts` flag. The definition of terminal suffix is a bit tricky in Hunspell. According to `the Hunspell documentation <http://manpages.ubuntu.com/manpages/trusty/en/man4/hunspell.4.html>`_, "Terminal suffix fields are inflectional suffix fields "removed" by additional (not terminal) suffixes". In other words, the ts flag in Hunspell represents whatever is left after stripping all affixes. Therefore, it is the morphological base.

            If the input cannot be analyzed morphologically, an empty list is returned.
        """
        if not isinstance(word_form, str):
            raise TypeError("Only a word (str) is allowed.")
        else:
            # Given the morphological analysis of a word-form with Hunspell flags, extract relevant information and return a dictionary
            word_analysis = list()
            for analysis in list(self.huns.analyze(word_form)):
                analysis_dict = dict()
                for item in analysis.split():
                    if ":" not in item:
                        continue
                    if item.split(":")[1] == "ts":
                        # ts flag exceptionally appears after the value as value:key in the Hunspell output
                        analysis_dict["base"] = item.split(":")[0]
                        # anything except the terminal_suffix is considered to be the base
                        analysis_dict[self.hunspell_flags[item.split(
                            ":")[1]]] = word_form.replace(
                                item.split(":")[0], "")
                    elif item.split(":")[0] in self.hunspell_flags.keys():
                        # assign the key:value pairs from the Hunspell string output to the dictionary output of the current function
                        # for ds flag, add derivation as the formation type, otherwise inflection
                        if item.split(":")[0] == "ds":
                            analysis_dict[self.hunspell_flags[item.split(
                                ":")[0]]] = "derivational"
                            analysis_dict[
                                self.hunspell_flags["is"]] = item.split(":")[1]
                        else:
                            analysis_dict[self.hunspell_flags[item.split(
                                ":")[0]]] = item.split(":")[1]

                # if there is no value assigned to the ts flag, the terminal suffix is a zero-morpheme 0
                if self.hunspell_flags[
                        "ts"] not in analysis_dict or analysis_dict[
                            self.hunspell_flags["ts"]] == "":
                    analysis_dict[self.hunspell_flags["ts"]] = "0"

                word_analysis.append(analysis_dict)

        return word_analysis