コード例 #1
0
    def parseTranslation(self, languageHeader, text):
        matcher = re.search(ENTranslationHandler.TRANSLATION, text)
        if matcher is None:
            return None

        prefix = matcher.group("prefix")
        content = matcher.group("content")
        postfix = matcher.group("postfix")

        if content.startswith("{{"):
            translation = self.parseTemplate(content)
        else:
            translation = WiktionaryTranslation(
                languageHeader,
                StringUtils.cleanText(WikiString.removeWikiLinks(content)))

        if translation is not None:
            additionalInformation = ""
            if prefix is not None:
                additionalInformation += prefix.strip()

            if translation.getGender() is not None:
                additionalInformation += " {{" + translation.getGender(
                ) + "}} "

            additionalInformation += postfix
            translation.setAdditionalInformation(
                StringUtils.cleanText(additionalInformation.strip()))
            if self.currentSense is not None and len(
                    self.currentSense.strip()) > 0:
                translation.setRawSense(self.currentSense.strip())

            return translation
        else:
            return None
コード例 #2
0
    def processBody(self, text, context):
        text = text.strip()

        if text.startswith("{{trans-mid}}") or text.startswith("{{mid}}"):
            return True
        if text.startswith("{{trans-top|") and ("}}" in text):
            template = TemplateParser.parseTemplate(text[2:text.find("}}")])
            if template is not None and template.getNumberedParamsCount() >= 1:
                self.currentSense = template.getNumberedParam(0)

            return True

        if text.startswith("{{top}}"):
            self.currentSense = ""
            return True

        if text.startswith("{{trans-bottom}}") or text.startswith(
                "{{bottom}}"
        ):  # This template indicates the end of the translation block
            return False
        if text.startswith("{{") or text.startswith(
                "=="):  # Indicates that a new block has just started.
            return False

        matcher = re.search(ENTranslationHandler.LANGUAGE, text)
        if matcher is None:
            return False

        languageText = WikiString.removeWikiLinks(matcher.group(1).strip())
        language = Language.findByName(languageText)

        endOffSet = matcher.end()
        if endOffSet > len(text) - 1:
            return False

        remainingText = text[endOffSet:]

        for part in self.splitTranslationParts(remainingText):
            translation = self.parseTranslation(language, part)
            if translation is not None:
                # Save the translation
                if self.currentSense not in self.sensNum2trans:
                    self.sensNum2trans[self.currentSense] = list()
                translations = self.sensNum2trans[self.currentSense]
                translations.append(translation)

        return True
コード例 #3
0
    def findMatchingSense(cls, entry, marker):
        """ @return the word sense whose sense definition
            corresponds to the specified comment (sense marker). The matching
            of the corresponding word sense is achieved by word similarity
            metrics. Returns <code>None</code> if no matching word sense
            could be found. """
        if entry.getSenseCount() == 1:
            return entry.getSense(1)

        # Empty sense marker.
        if marker is None or not len(marker):
            return None

        best1Gram = None
        best3Gram = None
        best1GramScore = -1
        best3GramScore = -1

        for sense in entry.senses:
            if sense.getIndex() <= 0:
                continue  # Skip unassigned sense.

            gloss = WikiString.removeWikiLinks(
                sense.getGloss().getText()).lower()
            similarity = SimilarityUtils.wordSim(marker, gloss)
            if similarity > best1GramScore:
                best1GramScore = similarity
                best1Gram = sense

            similarity = SimilarityUtils.textSim(marker, gloss)
            if similarity > best3GramScore:
                best3GramScore = similarity
                best3Gram = sense

        if best1Gram is None and best3Gram is None:
            return None

        if best1GramScore <= 0 and best3GramScore <= 0:
            return None

        if best1GramScore > best3GramScore:
            return best1Gram
        else:
            return best3Gram
コード例 #4
0
    def parseTemplate(self, templateString):
        template = TemplateParser.parseTemplate(templateString[2:-2])
        if template is None or template.getNumberedParamsCount() <= 1:
            return None

        translationText = StringUtils.cleanText(
            WikiString.removeWikiLinks(template.getNumberedParam(1)))
        if not translationText:
            return None

        languageCode = template.getNumberedParam(0)
        transliteration = template.getNamedParam("tr")
        translation = WiktionaryTranslation(Language.findByCode(languageCode),
                                            translationText)
        if template.getNumberedParamsCount(
        ) > 2 and "=" not in template.getNumberedParam(2):
            translation.setGender(template.getNumberedParam(2))

        translation.setCheckNeeded("check" in template.getName())
        if transliteration is not None:
            translation.setTransliteration(
                StringUtils.cleanText(transliteration))

        return translation
コード例 #5
0
 def testremoveWikiLinks(self):  # throws Exception
     self.assertEqual("Leader",
                      WikiString.removeWikiLinks("[[leader|Leader]]"))
     self.assertEqual("Leader", WikiString.removeWikiLinks("[[Leader]]"))