예제 #1
0
 def _testPhrase(self, language, phrase):
     assert Phrases.forLanguage(language).isPhrase(
         phrase), "phrase='%s' language=%s" % (phrase, language)
     number = Phrases.forLanguage(language).toNumber(phrase)
     detects = Phrases.detectLanguages(phrase)
     for lang2 in detects:
         number2 = Phrases.forLanguage(lang2).toNumber(phrase)
         phrase2 = Phrases.forLanguage(lang2).toPhrase(number)
         assert number == number2
         assert phrase == phrase2
예제 #2
0
    def _testPhrases(self, language):
        for number in range(100000):
            phrase = Phrases.forLanguage(language).toPhrase(number)
            self._testPhrase(language, phrase)

        for length in range(1, 20):
            phrase = self.mkPhrase(language, length)
            self._testPhrase(language, phrase)
예제 #3
0
    def number(self, phrase):
        phrase = Check.toString(phrase)
        phrases = self._getPhrases()
        if phrases.isPhrase(phrase):
            return phrases.toNumber(phrase)

        detects = Phrases.detectLanguages(phrase)
        if len(detects) > 0:
            return Phrases.forLanguage(detects.pop()).toNumber(phrase)

        raise ValueError("unknown phrase language")
예제 #4
0
    def testAmbiguity(self):
        languages = Phrases.getLanguages()
        allWords = dict()
        for language in languages:
            phrases = Phrases.forLanguage(language)
            for word in phrases.words:
                if not word in allWords:
                    allWords[word] = dict()
                index = phrases.invWords[word]
                if not index in allWords[word]:
                    allWords[word][index] = set()
                allWords[word][index].add(language)

        ambiguous = False
        for word in allWords:
            indexs = allWords[word]
            if len(indexs) <= 1: continue
            ambiguous = True
            for index in indexs:
                for lang in indexs[index]:
                    print("word %s in %s index %d" % (word, index, lang))

        assert not ambiguous
예제 #5
0
 def mkPhrase(self, language, length):
     phrases = Phrases.forLanguage(language)
     words = phrases.words
     phrase = [words[self.rng.next(len(words))] for i in range(length)]
     return phrases.space().join(phrase)
예제 #6
0
 def _getPhrases(self):
     return Phrases.forLanguage(self.getLanguage())
예제 #7
0
parentdir = os.path.dirname(currentdir)

sys.path.insert(0, parentdir)

from phrases import Phrases

languages = [
    "ab", "chinese_simplified", "chinese_traditional", "english", "french",
    "italian", "japanese", "korean", "spanish"
]

reserved = dict()

for el1 in range(len(languages)):
    language = languages[el1]
    phrases = Phrases.forLanguage(language)
    words = phrases.words[:]
    sequence = [None for i in range(len(words))]
    reuses = False
    changed = False

    # put already reserved words at their index
    for i in range(len(words)):
        word = words[i]
        if word == None: continue
        if word in reserved:
            words[i] = None
            if phrases.invWords[word] != reserved[word]:
                changed = True
            sequence[reserved[word]] = word
            reuses = True