def _testPhrase(self, language, phrase): assert Phrases.forLanguage(language).isPhrase( phrase), "phrase='%s' language=%s" % (phrase, language) number = Phrases.forLanguage(language).toNumber(phrase) detects = Phrases.detectLanguages(phrase) for lang2 in detects: number2 = Phrases.forLanguage(lang2).toNumber(phrase) phrase2 = Phrases.forLanguage(lang2).toPhrase(number) assert number == number2 assert phrase == phrase2
def _testPhrases(self, language): for number in range(100000): phrase = Phrases.forLanguage(language).toPhrase(number) self._testPhrase(language, phrase) for length in range(1, 20): phrase = self.mkPhrase(language, length) self._testPhrase(language, phrase)
def number(self, phrase): phrase = Check.toString(phrase) phrases = self._getPhrases() if phrases.isPhrase(phrase): return phrases.toNumber(phrase) detects = Phrases.detectLanguages(phrase) if len(detects) > 0: return Phrases.forLanguage(detects.pop()).toNumber(phrase) raise ValueError("unknown phrase language")
def testAmbiguity(self): languages = Phrases.getLanguages() allWords = dict() for language in languages: phrases = Phrases.forLanguage(language) for word in phrases.words: if not word in allWords: allWords[word] = dict() index = phrases.invWords[word] if not index in allWords[word]: allWords[word][index] = set() allWords[word][index].add(language) ambiguous = False for word in allWords: indexs = allWords[word] if len(indexs) <= 1: continue ambiguous = True for index in indexs: for lang in indexs[index]: print("word %s in %s index %d" % (word, index, lang)) assert not ambiguous
def mkPhrase(self, language, length): phrases = Phrases.forLanguage(language) words = phrases.words phrase = [words[self.rng.next(len(words))] for i in range(length)] return phrases.space().join(phrase)
def _getPhrases(self): return Phrases.forLanguage(self.getLanguage())
parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) from phrases import Phrases languages = [ "ab", "chinese_simplified", "chinese_traditional", "english", "french", "italian", "japanese", "korean", "spanish" ] reserved = dict() for el1 in range(len(languages)): language = languages[el1] phrases = Phrases.forLanguage(language) words = phrases.words[:] sequence = [None for i in range(len(words))] reuses = False changed = False # put already reserved words at their index for i in range(len(words)): word = words[i] if word == None: continue if word in reserved: words[i] = None if phrases.invWords[word] != reserved[word]: changed = True sequence[reserved[word]] = word reuses = True