def parseTranslation(self, languageHeader, text): matcher = re.search(ENTranslationHandler.TRANSLATION, text) if matcher is None: return None prefix = matcher.group("prefix") content = matcher.group("content") postfix = matcher.group("postfix") if content.startswith("{{"): translation = self.parseTemplate(content) else: translation = WiktionaryTranslation( languageHeader, StringUtils.cleanText(WikiString.removeWikiLinks(content))) if translation is not None: additionalInformation = "" if prefix is not None: additionalInformation += prefix.strip() if translation.getGender() is not None: additionalInformation += " {{" + translation.getGender( ) + "}} " additionalInformation += postfix translation.setAdditionalInformation( StringUtils.cleanText(additionalInformation.strip())) if self.currentSense is not None and len( self.currentSense.strip()) > 0: translation.setRawSense(self.currentSense.strip()) return translation else: return None
def processBody(self, text, context): text = text.strip() if text.startswith("{{trans-mid}}") or text.startswith("{{mid}}"): return True if text.startswith("{{trans-top|") and ("}}" in text): template = TemplateParser.parseTemplate(text[2:text.find("}}")]) if template is not None and template.getNumberedParamsCount() >= 1: self.currentSense = template.getNumberedParam(0) return True if text.startswith("{{top}}"): self.currentSense = "" return True if text.startswith("{{trans-bottom}}") or text.startswith( "{{bottom}}" ): # This template indicates the end of the translation block return False if text.startswith("{{") or text.startswith( "=="): # Indicates that a new block has just started. return False matcher = re.search(ENTranslationHandler.LANGUAGE, text) if matcher is None: return False languageText = WikiString.removeWikiLinks(matcher.group(1).strip()) language = Language.findByName(languageText) endOffSet = matcher.end() if endOffSet > len(text) - 1: return False remainingText = text[endOffSet:] for part in self.splitTranslationParts(remainingText): translation = self.parseTranslation(language, part) if translation is not None: # Save the translation if self.currentSense not in self.sensNum2trans: self.sensNum2trans[self.currentSense] = list() translations = self.sensNum2trans[self.currentSense] translations.append(translation) return True
def findMatchingSense(cls, entry, marker): """ @return the word sense whose sense definition corresponds to the specified comment (sense marker). The matching of the corresponding word sense is achieved by word similarity metrics. Returns <code>None</code> if no matching word sense could be found. """ if entry.getSenseCount() == 1: return entry.getSense(1) # Empty sense marker. if marker is None or not len(marker): return None best1Gram = None best3Gram = None best1GramScore = -1 best3GramScore = -1 for sense in entry.senses: if sense.getIndex() <= 0: continue # Skip unassigned sense. gloss = WikiString.removeWikiLinks( sense.getGloss().getText()).lower() similarity = SimilarityUtils.wordSim(marker, gloss) if similarity > best1GramScore: best1GramScore = similarity best1Gram = sense similarity = SimilarityUtils.textSim(marker, gloss) if similarity > best3GramScore: best3GramScore = similarity best3Gram = sense if best1Gram is None and best3Gram is None: return None if best1GramScore <= 0 and best3GramScore <= 0: return None if best1GramScore > best3GramScore: return best1Gram else: return best3Gram
def parseTemplate(self, templateString): template = TemplateParser.parseTemplate(templateString[2:-2]) if template is None or template.getNumberedParamsCount() <= 1: return None translationText = StringUtils.cleanText( WikiString.removeWikiLinks(template.getNumberedParam(1))) if not translationText: return None languageCode = template.getNumberedParam(0) transliteration = template.getNamedParam("tr") translation = WiktionaryTranslation(Language.findByCode(languageCode), translationText) if template.getNumberedParamsCount( ) > 2 and "=" not in template.getNumberedParam(2): translation.setGender(template.getNumberedParam(2)) translation.setCheckNeeded("check" in template.getName()) if transliteration is not None: translation.setTransliteration( StringUtils.cleanText(transliteration)) return translation
def testremoveWikiLinks(self): # throws Exception self.assertEqual("Leader", WikiString.removeWikiLinks("[[leader|Leader]]")) self.assertEqual("Leader", WikiString.removeWikiLinks("[[Leader]]"))