Example #1
0
 def test_lookup_max_edit_distance_too_large(self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     with pytest.raises(ValueError) as excinfo:
         __ = sym_spell.lookup("flam", Verbosity.TOP, 3)
     self.assertEqual("Distance too large", str(excinfo.value))
Example #2
0
    def test_create_dictionary_entry_negative_count(self):
        sym_spell = SymSpell(1, 3)
        self.assertEqual(False, sym_spell.create_dictionary_entry("pipe", 0))
        self.assertEqual(False, sym_spell.create_dictionary_entry("pipe", -1))

        sym_spell = SymSpell(1, 3, count_threshold=0)
        self.assertEqual(True, sym_spell.create_dictionary_entry("pipe", 0))
Example #3
0
 def test_lookup_should_not_return_non_word_delete(self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("pawn", 10)
     result = sym_spell.lookup("paw", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
     result = sym_spell.lookup("awn", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
Example #4
0
 def test_lookup_should_not_return_low_count_word_that_are_also_delete_word(
         self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     result = sym_spell.lookup("flam", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
Example #5
0
 def test_lookup_include_unknown(self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     result = sym_spell.lookup("flam", Verbosity.TOP, 0, True)
     self.assertEqual(1, len(result))
     self.assertEqual("flam", result[0].term)
Example #6
0
    def test_words_with_shared_prefix_should_retain_counts(self):
        sym_spell = SymSpell(1, 3)
        sym_spell.create_dictionary_entry("pipe", 5)
        sym_spell.create_dictionary_entry("pips", 10)

        result = sym_spell.lookup("pipe", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pipe", result[0].term)
        self.assertEqual(5, result[0].count)
        self.assertEqual("pips", result[1].term)
        self.assertEqual(10, result[1].count)

        result = sym_spell.lookup("pips", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pips", result[0].term)
        self.assertEqual(10, result[0].count)
        self.assertEqual("pipe", result[1].term)
        self.assertEqual(5, result[1].count)

        result = sym_spell.lookup("pip", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pips", result[0].term)
        self.assertEqual(10, result[0].count)
        self.assertEqual("pipe", result[1].term)
        self.assertEqual(5, result[1].count)
Example #7
0
 def test_lookup_should_find_exact_match(self):
     sym_spell = SymSpell()
     sym_spell.create_dictionary_entry("steama", 4)
     sym_spell.create_dictionary_entry("steamb", 6)
     sym_spell.create_dictionary_entry("steamc", 2)
     result = sym_spell.lookup("streama", Verbosity.TOP, 2)
     self.assertEqual(1, len(result))
     self.assertEqual("steama", result[0].term)
Example #8
0
    def test_add_additional_counts_should_not_add_word_again(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, 11)
        self.assertEqual(1, sym_spell.word_count)

        sym_spell.create_dictionary_entry(word, 3)
        self.assertEqual(1, sym_spell.word_count)
Example #9
0
 def test_lookup_include_unknown(self):
     print('  - %s' % inspect.stack()[0][3])
     sym_spell = SymSpell(16, 2, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     result = sym_spell.lookup("qwer", Verbosity.TOP, 0, True)
     self.assertEqual(1, len(result))
     self.assertEqual("qwer", result[0].term)
Example #10
0
 def test_lookup_should_not_return_non_word_delete(self):
     print('  - %s' % inspect.stack()[0][3])
     sym_spell = SymSpell(16, 2, 7, 10)
     sym_spell.create_dictionary_entry("pawn", 10)
     result = sym_spell.lookup("paw", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
     result = sym_spell.lookup("awn", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
Example #11
0
    def test_create_dictionary_entry_negative_count(self):
        print('  - %s' % inspect.stack()[0][3])
        sym_spell = SymSpell(16, 1, 3)
        self.assertEqual(False, sym_spell.create_dictionary_entry("pipe", 0))
        self.assertEqual(False, sym_spell.create_dictionary_entry("pipe", -1))

        sym_spell = SymSpell(16, 1, 3, count_threshold=0)
        self.assertEqual(True, sym_spell.create_dictionary_entry("pipe", 0))
Example #12
0
def test_lookup_transfer_casing_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.create_dictionary_entry("steam", 4)
    result = benchmark(sym_spell.lookup,
                       "StreaM",
                       VerbosityPy.TOP,
                       2,
                       transfer_casing=True)
    assert (result[0].term == "SteaM")
Example #13
0
    def test_add_additional_counts_should_not_add_word_again(self):
        print('  - %s' % inspect.stack()[0][3])
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, 11)
        self.assertEqual(1, sym_spell.word_count)

        sym_spell.create_dictionary_entry(word, 3)
        self.assertEqual(1, sym_spell.word_count)
Example #14
0
 def test_lookup_should_return_most_frequent(self):
     sym_spell = SymSpell()
     sym_spell.create_dictionary_entry("steama", 4)
     sym_spell.create_dictionary_entry("steamb", 6)
     sym_spell.create_dictionary_entry("steamc", 2)
     result = sym_spell.lookup("stream", Verbosity.TOP, 2)
     self.assertEqual(1, len(result))
     self.assertEqual("steamb", result[0].term)
     self.assertEqual(6, result[0].count)
Example #15
0
 def test_lookup_avoid_exact_match_early_exit(self):
     edit_distance_max = 2
     sym_spell = SymSpell(edit_distance_max, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     result = sym_spell.lookup("24th", Verbosity.ALL, edit_distance_max,
                               ignore_token=r"\d{2}\w*\b")
     self.assertEqual(1, len(result))
     self.assertEqual("24th", result[0].term)
Example #16
0
 def test_deletes(self):
     sym_spell = SymSpell()
     sym_spell.create_dictionary_entry("steama", 4)
     sym_spell.create_dictionary_entry("steamb", 6)
     sym_spell.create_dictionary_entry("steamc", 2)
     result = sym_spell.lookup("stream", Verbosity.TOP, 2)
     self.assertEqual(1, len(result))
     self.assertEqual("steamb", result[0].term)
     self.assertEqual(6, result[0].count)
     self.assertTrue(len(sym_spell.deletes))
Example #17
0
    def test_lookup_compound_no_suggestion(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("machine", 1)

        typo = "qwer erty ytui a"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(typo, results[0].term)
Example #18
0
    def test_add_additional_counts_should_not_overflow(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, sys.maxsize - 10)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(sys.maxsize - 10, count)

        sym_spell.create_dictionary_entry(word, 11)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(sys.maxsize, count)
Example #19
0
    def test_add_additional_counts_should_increase_count(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, 11)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(11, count)

        sym_spell.create_dictionary_entry(word, 3)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(11 + 3, count)
Example #20
0
    def test_lookup_compound_only_combi(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("machine", 1)

        typo = "ste am machie"
        correction = "steam machine"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
Example #21
0
    def test_lookup_transfer_casing(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("Stream",
                                  Verbosity.TOP,
                                  2,
                                  transfer_casing=True)
        self.assertEqual("Steam", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("StreaM",
                                  Verbosity.TOP,
                                  2,
                                  transfer_casing=True)
        self.assertEqual("SteaM", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("STREAM",
                                  Verbosity.TOP,
                                  2,
                                  transfer_casing=True)
        self.assertEqual("STEAM", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("i", 4)
        result = sym_spell.lookup("I", Verbosity.TOP, 2, transfer_casing=True)
        self.assertEqual("I", result[0].term)
class NameChecker(object):
	def __init__(self, name_list):
		self.sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
		
		for each_name in name_list:
			self.sym_spell.create_dictionary_entry(each_name, len(each_name.split(' ')))
		
	def get_name(self, name):
		suggestions = self.sym_spell.lookup(name, Verbosity.CLOSEST, max_edit_distance=2, transfer_casing=True)
		
		if suggestions is not None and len(suggestions) > 0:
			return suggestions[0].term
		
		return name
Example #23
0
    def test_create_dictionary_entry_below_threshold(self):
        sym_spell = SymSpell(1, 3, count_threshold=10)
        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(1, len(sym_spell.below_threshold_words))
        self.assertEqual(4, sym_spell.below_threshold_words["pipe"])

        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(1, len(sym_spell.below_threshold_words))
        self.assertEqual(8, sym_spell.below_threshold_words["pipe"])

        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(0, len(sym_spell.below_threshold_words))
Example #24
0
    def test_verbosity_should_control_lookup_results(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("steams", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steems", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        result = sym_spell.lookup("steems", Verbosity.CLOSEST, 2)
        self.assertEqual(2, len(result))
        result = sym_spell.lookup("steems", Verbosity.ALL, 2)
        self.assertEqual(3, len(result))
Example #25
0
    def test_create_dictionary_entry_below_threshold(self):
        print('  - %s' % inspect.stack()[0][3])
        sym_spell = SymSpell(16, 1, 3, count_threshold=10)
        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(1, len(sym_spell.below_threshold_words))
        self.assertEqual(4, sym_spell.below_threshold_words["pipe"])

        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(1, len(sym_spell.below_threshold_words))
        self.assertEqual(8, sym_spell.below_threshold_words["pipe"])

        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(0, len(sym_spell.below_threshold_words))
Example #26
0
    def test_delete_dictionary_entry_invalid_word(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("stea", 1)
        sym_spell.create_dictionary_entry("steama", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)
        self.assertEqual(len("steama"), sym_spell._max_length)

        self.assertFalse(sym_spell.delete_dictionary_entry("steamab"))
        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)
        self.assertEqual(len("steama"), sym_spell._max_length)
Example #27
0
def spell_checker(corr_list):
    sc = SymSpell(max_dictionary_edit_distance=2)
    for word in corr_list:
        sc.create_dictionary_entry(word, 1)
    return sc
Example #28
0
class MagicRecognition:
    def __init__(self, file_all_cards: str, file_keywords: str, languages=tuple("English"), max_ratio_diff=0.3, max_ratio_diff_keyword=0.2) -> None:
        """Load dictionnaries of cards and keywords

        Parameters
        ----------
        file_all_cards: str
            Path to the file containing all cards. If the file does not exist, it is downloaded from mtgjson.
        file_keywords: str
            Path to the file containing all keywords. If the file does not exist, it is downloaded from mtgjson.
        max_ratio_diff : float, optional
            Maximum ratio (distance/length) for a text to be considered as a card name, by default 0.3
        max_ratio_diff_keyword : float, optional
            Maximum ratio (distance/length) for a text to be considered as a (ignored) keyword, by default 0.2
        """
        self.max_ratio_diff = max_ratio_diff
        self.max_ratio_diff_keyword = max_ratio_diff_keyword
        
        if not Path(file_all_cards).is_file():
            def write_card(f, card):
                i = card.find(" //")
                if i != -1:
                    card = card[:i]
                f.write(card + "$1\n")  # required for SymSpell

            all_cards_json = load_json(URL_ALL_CARDS)
            with Path(file_all_cards).open("a") as f:
                for card, l in all_cards_json["data"].items():
                    if "English" in languages:
                        write_card(f, card)
                    for e in l[0]["foreignData"]:
                        if e["language"] in languages:
                            write_card(f, e["name"])

        self.sym_all_cards = SymSpell(max_dictionary_edit_distance=6)
        self.sym_all_cards._distance_algorithm = editdistance.DistanceAlgorithm.LEVENSHTEIN
        self.sym_all_cards.load_dictionary(file_all_cards, 0, 1, separator="$")
        self.all_cards = self.sym_all_cards._words
        print(f"Loaded {file_all_cards}: {len(self.all_cards)} cards")
        self.edit_dist = editdistance.EditDistance(editdistance.DistanceAlgorithm.LEVENSHTEIN)

        if not Path(file_keywords).is_file():
            keywords = load_json(URL_KEYWORDS)
            json.dump(keywords, Path(file_keywords).open("w"))

        def concat_lists(LL):
            res = []
            for L in LL:
                res.extend(L)
            return res

        keywords_json = json.load(Path(file_keywords).open())
        keywords = concat_lists(keywords_json["data"].values())
        keywords.extend(["Display", "Land", "Search", "Profile"])
        self.sym_keywords = SymSpell(max_dictionary_edit_distance=3)
        for k in keywords:
            self.sym_keywords.create_dictionary_entry(k, 1)
        print(f"Loaded {file_keywords}: {len(keywords)} cards")

    def _preprocess(self, text: str) -> str:
        """Remove characters which can't appear on a Magic card (OCR error)"""
        return re.sub("[^a-zA-Z',. ]", '', text).rstrip(' ')

    def _preprocess_texts(self, box_texts: BoxTextList) -> None:
        """Apply `preprocess` on each text"""
        for box_text in box_texts:
            box_text.text = self._preprocess(box_text.text)

    def box_texts_to_cards(self, box_texts: BoxTextList) -> BoxTextList:
        """Recognize cards from raw texts"""
        box_texts.sort()
        box_cards = BoxTextList()
        for box, text, _ in box_texts:
            sug = self.sym_keywords.lookup(text,
                                           Verbosity.CLOSEST,
                                           max_edit_distance=min(3, int(self.max_ratio_diff_keyword * len(text))))
            if sug != []:
                logging.info(f"Keyword rejected: {text} {sug[0].distance/len(text)} {sug[0].term}")
            else:
                card = self._search(self._preprocess(text))
                if card is not None:
                    box_cards.add(box, card)
        return box_cards

    def _assign_stacked(self, box_texts: BoxTextList, box_cards: BoxTextList) -> None:
        """Set multipliers (e.g. x4) for each (stacked) card in `box_cards`

        Parameters
        ----------
        box_texts : BoxTextList
            BoxTextList containing potential multipliers
        box_cards : BoxTextList
            BoxTextList containing recognized cards
        """
        def _assign_stacked_one(box_cards: BoxTextList, m: int, comp) -> None:
            i_min = 0
            for i, box_card in enumerate(box_cards):
                if comp(box_card.box, box_cards[i_min].box):
                    i_min = i
            box_cards[i_min].n = m
            logging.info(f"{box_cards[i_min].text} assigned to x{m}")

        def dist(p: tuple, q: tuple) -> float:
            return (p[0] - q[0])**2 + (p[1] - q[1])**2

        def comp_md(box1: tuple, box2: tuple, box: tuple) -> float:
            if box1[0] > box[0] or box1[1] > box[1]:
                return False
            return dist(box, box1) < dist(box, box2)

        def comp_sb(box1: tuple, box2: tuple, box: tuple) -> float:
            return dist(box, box1) < dist(box, box2)

        comp = (comp_md, comp_sb)
        for box, text, _ in box_texts:
            if len(text) == 2:
                for i in [0, 1]:
                    if text[i] in '×xX' and text[1 - i].isnumeric():
                        _assign_stacked_one(box_cards, int(text[1 - i]), partial(comp[i], box=box))

    def _box_cards_to_deck(self, box_cards: BoxTextList) -> Deck:
        """Convert recognized cards to decklist"""
        maindeck, sideboard = Pile(), Pile()
        n_cards = sum(c.n for c in box_cards)
        n_added = 0
        last_main_card = max(60, n_cards - 15)
        for _, card, n in box_cards:

            def add_cards(c, deck, p):
                if c in deck.cards:
                    deck.cards[c] += p
                elif p > 0:
                    deck.cards[c] = p

            n_added_main = max(min(n, last_main_card - n_added), 0)
            add_cards(card, maindeck, n_added_main)
            add_cards(card, sideboard, n - n_added_main)
            n_added += n
        deck = Deck()
        deck.maindeck = maindeck
        deck.sideboard = sideboard
        return deck

    def box_texts_to_deck(self, box_texts: BoxTextList) -> Deck:
        """Convert raw texts to decklist

        Parameters
        ----------
        box_texts : BoxTextList
            Raw texts given by an OCR

        Returns
        -------
        Deck
            Decklist obtained from `box_texts`
        """
        box_cards = self.box_texts_to_cards(box_texts)
        self._assign_stacked(box_texts, box_cards)
        return self._box_cards_to_deck(box_cards)

    def _search(self, text):
        """If `text` can be recognized as a Magic card, return that card. Otherwise, return None."""
        if len(text) < 3:  # a card name is never that short
            return None
        if len(text) > 30:  # a card name is never that long
            logging.info(f"Too long: {text}")
            return None
        if text in self.all_cards:
            return text
        i = text.find("..")  # search for truncated card name
        if i != -1:
            dist = int(self.max_ratio_diff * i)
            card = None
            for c in self.all_cards:
                d = self.edit_dist.compare(text[:i], c[:i], dist)
                if d != -1 and d < dist:
                    card = c
                    dist = d
            if card is None:
                logging.info(f"Not prefix: {text}")
            else:
                logging.info(f"Found prefix: {text} {dist/i} {card}")
                return card
        else:
            text = text.replace('.', '').rstrip(' ')
            sug = self.sym_all_cards.lookup(text,
                                            Verbosity.CLOSEST,
                                            max_edit_distance=min(6, int(self.max_ratio_diff * len(text))))
            if sug != []:
                card = sug[0].term
                ratio = sug[0].distance / len(text)
                if len(text) < len(card) + 7:
                    logging.info(f"Corrected: {text} {ratio} {card}")
                    return card
                logging.info(f"Not corrected (too long): {text} {ratio} {card}")
            else:
                logging.info(f"Not found: {text}")
        return None
Example #29
0
 def _create_spell_checker_dict(ents_counter, thres):
     sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
     for ent, count in ents_counter.items():
         if count > thres:
             sym_spell.create_dictionary_entry(ent, count)
     return sym_spell
Example #30
0
 def test_lookup_should_not_return_low_count_word(self):
     sym_spell = SymSpell(16, 2, 7, 10)
     sym_spell.create_dictionary_entry("pawn", 1)
     result = sym_spell.lookup("pawn", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))