Example #1
0
 def expression2dictmeaningssource(self, expression):
     dictmeaningssources = [
             # Use CEDICT to get meanings
             (u"",
              lambda: self.dictionary.meanings(expression, self.config.prefersimptrad)[0]),
             # Interpret Hanzi as numbers. NB: only consult after CEDICT so that we
             # handle curious numbers such as 'liang' using the dictionary
             (u"",
              lambda: numbers.meaningfromnumberlike(expression, self.dictionary))
         ] + (self.config.shouldusegoogletranslate and [
             # If the dictionary can't answer our question, ask Google Translate.
             # If there is a long word followed by another word then this will be treated as a phrase.
             # Phrases are also queried using googletranslate rather than the local dictionary.
             # This helps deal with small dictionaries (for example French)
             (u'<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>',
              lambda: dictionaryonline.gTrans(expression, self.config.dictlanguage))
         ] or [])
     
     # Find the first source that returns a sensible meaning
     for dictmeaningssource, lookup in dictmeaningssources:
         dictmeanings = lookup()
         if dictmeanings != None:
             return dictmeanings, dictmeaningssource
     
     # No information available
     return None
Example #2
0
 def expression2dictmeaningssource(self, expression):
     dictmeaningssources = [
             # Use CEDICT to get meanings
             (u"",
              lambda: self.dictionary.meanings(expression, self.config.prefersimptrad)[0]),
             # Interpret Hanzi as numbers. NB: only consult after CEDICT so that we
             # handle curious numbers such as 'liang' using the dictionary
             (u"",
              lambda: numbers.meaningfromnumberlike(expression, self.dictionary))
         ] + (self.config.shouldusegoogletranslate and [
             # If the dictionary can't answer our question, ask Google Translate.
             # If there is a long word followed by another word then this will be treated as a phrase.
             # Phrases are also queried using googletranslate rather than the local dictionary.
             # This helps deal with small dictionaries (for example French)
             (u'<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>',
              lambda: dictionaryonline.gTrans(expression, self.config.dictlanguage))
         ] or [])
     
     # Find the first source that returns a sensible meaning
     for dictmeaningssource, lookup in dictmeaningssources:
         dictmeanings = lookup()
         if dictmeanings != None:
             return dictmeanings, dictmeaningssource
     
     # No information available
     return None
Example #3
0
    def expression2simptrad(self, expression):
        result = {}
        for charmode, glangcode in [("simp", "zh-CN"), ("trad", "zh-TW")]:
            # Query Google for the conversion, returned in the format: ["社會",[["noun","社會","社會","社會"]]]
            log.info("Doing conversion of %s into %s characters", expression, charmode)
            meanings = dictionaryonline.gTrans(expression, glangcode, False)

            if meanings is None or len(meanings) == 0:
                # No conversion, so give up and return the input expression
                result[charmode] = expression
            else:
                # Conversion is stored in the first 'meaning'
                result[charmode] = model.flatten(meanings[0])
        
        return result
Example #4
0
    def expression2simptrad(self, expression):
        result = {}
        for charmode, glangcode in [("simp", "zh-CN"), ("trad", "zh-TW")]:
            # Query Google for the conversion, returned in the format: ["社會",[["noun","社會","社會","社會"]]]
            log.info("Doing conversion of %s into %s characters", expression, charmode)
            meanings = dictionaryonline.gTrans(expression, glangcode, False)

            if meanings is None or len(meanings) == 0:
                # No conversion, so give up and return the input expression
                result[charmode] = expression
            else:
                # Conversion is stored in the first 'meaning'
                result[charmode] = model.flatten(meanings[0])
        
        return result
Example #5
0
    def generateincharactersystem(self, expression, charmode):
        log.info("Doing conversion of %s into %s characters", expression, charmode)

        # Query Google for the conversion, returned in the format: ["社會",[["noun","社會","社會","社會"]]]
        if charmode=="simp":
            glangcode="zh-CN"
        else:
            glangcode="zh-TW"
        meanings = dictionaryonline.gTrans(expression, glangcode, False)
        
        if meanings == None or len(meanings) == 0:
            # No conversion, so give up and return the input expression
            return expression
        else:
            # Conversion is stored in the first 'meaning'
            return model.flatten(meanings[0])
Example #6
0
    def generateincharactersystem(self, expression, charmode):
        log.info("Doing conversion of %s into %s characters", expression,
                 charmode)

        # Query Google for the conversion, returned in the format: ["社會",[["noun","社會","社會","社會"]]]
        if charmode == "simp":
            glangcode = "zh-CN"
        else:
            glangcode = "zh-TW"
        meanings = dictionaryonline.gTrans(expression, glangcode, False)

        if meanings == None or len(meanings) == 0:
            # No conversion, so give up and return the input expression
            return expression
        else:
            # Conversion is stored in the first 'meaning'
            return model.flatten(meanings[0])
Example #7
0
    def updatefact(self, fact, expression):
        # AutoBlanking Feature - If there is no expression, zeros relevant fields
        # DEBUG - add feature to store the text when a lookup is performed. When new text is entered then allow auto-blank any field that has not been edited
        if expression == None or expression.strip() == u"":
            for key in ["reading", "meaning", "color", "trad", "simp", "weblinks"]:
                if key in fact:
                    fact[key] = u""
            
            # DEBUG Me - Auto generated pinyin should be at least "[sound:" + ".xxx]" (12 characters) plus pinyin (max 6). i.e. 18
            # DEBUG - Split string around "][" to get the audio of each sound in an array. Blank the field unless any one string is longer than 20 characters
            # Exploit the fact that pinyin text-to-speech pinyin should be no longer than 18 characters to guess that anything longer is user generated
            # MaxB comment: I don't think that will work, because we import the Chinese-Lessons.com Mandarin Sounds into anki and it gives them /long/ names.
            # Instead, how about we check if all of the audio files referenced are files in the format pinyin<tone>.mp3?
            if 'audio' in fact and len(fact['audio']) < 40:
                fact['audio'] = u""
            
            # For now this is a compromise in safety and function.
            # longest MW should be: "? - zhangì (9 char)
            # shortest possible is "? - ge" 6 char so we will autoblank if less than 12 letters
            # this means blanking will occur if one measure word is there but not if two (so if user added any they are safe)
            if 'mw' in fact and len(fact['mw']) < 12: 
                fact['mw'] = u""
            
            # TODO: Nick added this to give up after auto-blanking. He claims it removes a minor
            # delay, but I'm not sure where the delay originates from, which worries me:
            return
        
        # Apply tone sandhi: this information is needed both by the sound generation
        # and the colorisation, so we can't do it in generatereading
        dictreading = self.getdictreading(expression)
        dictreadingsandhi = transformations.tonesandhi(dictreading)
  
        # Preload the meaning, but only if we absolutely must
        if self.config.needmeanings:
            dictmeaningssources = [
                    # Use CEDICT to get meanings
                    (None,
                     lambda: self.dictionary.meanings(expression, self.config.prefersimptrad)),
                    # Interpret Hanzi as numbers. NB: only consult after CEDICT so that we
                    # handle curious numbers such as 'liang' using the dictionary
                    (None,
                     lambda: (numberutils.meaningfromnumberlike(expression, self.dictionary), None))
                ] + (self.config.shouldusegoogletranslate and [
                    # If the dictionary can't answer our question, ask Google Translate.
                    # If there is a long word followed by another word then this will be treated as a phrase.
                    # Phrases are also queried using googletranslate rather than the local dictionary.
                    # This helps deal with small dictionaries (for example French)
                    ('<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>',
                     lambda: (dictionaryonline.gTrans(expression, self.config.dictlanguage), None))
                ] or [])
            
            # Find the first source that returns a sensible meaning
            for dictmeaningssource, lookup in dictmeaningssources:
                dictmeanings, dictmeasurewords = lookup()
                if dictmeanings != None or dictmeasurewords != None:
                    break
            
            # If the user wants the measure words to be folded into the definition or there
            # is no MW field for us to split them out into, fold them in there
            if not(self.config.detectmeasurewords) or "mw" not in fact:
                # NB: do NOT overwrite the old dictmeasurewords, because we still want to use the
                # measure words for e.g. measure word audio generation
                dictmeanings = dictionary.combinemeaningsmws(dictmeanings, dictmeasurewords)
            
            # NB: expression only used for Hanzi masking here
            meaning = self.generatemeanings(expression, dictmeanings)
            if meaning and dictmeaningssource:
                # Append attribution to the meaning if we have any
                meaning = meaning + dictmeaningssource

        # Generate translations of the expression into simplified/traditional on-demand
        expressionviews = utils.FactoryDict(lambda simptrad: self.generateincharactersystem(expression, simptrad))
        
        # Update the expression is option is turned on and the preference simp/trad is different to expression (i.e. needs correcting)
        expressionupdated = False
        if self.config.forceexpressiontobesimptrad and (expression != expressionviews[self.config.prefersimptrad]):
            expression = expressionviews[self.config.prefersimptrad]
            expressionupdated = True

        # Do the updates on the fields the user has requested:
        # NB: when adding an updater to this list, make sure that you have
        # added it to the updatecontrolflags dictionary in Config as well!
        updaters = {
                'expression' : lambda: expression,
                'reading'    : lambda: self.generatereading(dictreadingsandhi),
                'meaning'    : lambda: meaning,
                'mw'         : lambda: self.generatemeasureword(self.config.detectmeasurewords and dictmeasurewords or None),
                'audio'      : lambda: self.generateaudio(dictreadingsandhi),
                'mwaudio'    : lambda: self.generatemwaudio(dictreading, dictmeasurewords),
                'color'      : lambda: self.generatecoloredcharacters(expression),
                'trad'       : lambda: (expressionviews["trad"] != expressionviews["simp"]) and expressionviews["trad"] or None,
                'simp'       : lambda: (expressionviews["trad"] != expressionviews["simp"]) and expressionviews["simp"] or None,
                'weblinks'   : lambda: self.weblinkgeneration(expression)
            }

        # Loop through each field, deciding whether to update it or not
        for key, updater in updaters.items():
            # A hint for reading this method: read the stuff inside the if not(...):
            # as an assertion that has to be valid before we can proceed with the update.
            
            # If this option has been disabled or the field isn't present then jump to the next update.
            # Expression is always updated because some parts of the code call updatefact with an expression
            # that is not yet set on the fact, and we need to make sure that it arrives. This is OK, because
            # we only actually modify a directly user-entered expression when forceexpressiontobesimptrad is on.
            #
            # NB: please do NOT do this if key isn't in updatecontrolflags, because that
            # indicates an error with the Toolkit that I'd like to get an exception for!
            if not(key in fact and (key == "expression" or updatecontrolflags[key] is None or self.config.settings[updatecontrolflags[key]])):
                continue
            
            # If the field is not empty already then skip (so we don't overwrite it), unless:
            # a) this is the expression field, which should always be over-written with simp/trad
            # b) this is the weblinks field, which must always be up to date
            # c) this is the color field and we have just forced the expression to change,
            #    in which case we'd like to overwrite the colored characters regardless
            if not(fact[key].strip() == u"" or key in ["expression", "weblinks"] or (key == "color" and expressionupdated)):
                continue
            
            # Fill the field with the new value, but only if we have one and it is necessary to do so
            value = updater()
            if value != None and value != fact[key]:
                fact[key] = value
Example #8
0
    def updatefact(self, fact, expression):
        # AutoBlanking Feature - If there is no expression, zeros relevant fields
        # DEBUG - add feature to store the text when a lookup is performed. When new text is entered then allow auto-blank any field that has not been edited
        if expression == None or expression.strip() == u"":
            for key in [
                    "reading", "meaning", "color", "trad", "simp", "weblinks"
            ]:
                if key in fact:
                    fact[key] = u""

            # DEBUG Me - Auto generated pinyin should be at least "[sound:" + ".xxx]" (12 characters) plus pinyin (max 6). i.e. 18
            # DEBUG - Split string around "][" to get the audio of each sound in an array. Blank the field unless any one string is longer than 20 characters
            # Exploit the fact that pinyin text-to-speech pinyin should be no longer than 18 characters to guess that anything longer is user generated
            # MaxB comment: I don't think that will work, because we import the Chinese-Lessons.com Mandarin Sounds into anki and it gives them /long/ names.
            # Instead, how about we check if all of the audio files referenced are files in the format pinyin<tone>.mp3?
            if 'audio' in fact and len(fact['audio']) < 40:
                fact['audio'] = u""

            # For now this is a compromise in safety and function.
            # longest MW should be: "? - zhangì (9 char)
            # shortest possible is "? - ge" 6 char so we will autoblank if less than 12 letters
            # this means blanking will occur if one measure word is there but not if two (so if user added any they are safe)
            if 'mw' in fact and len(fact['mw']) < 12:
                fact['mw'] = u""

            # TODO: Nick added this to give up after auto-blanking. He claims it removes a minor
            # delay, but I'm not sure where the delay originates from, which worries me:
            return

        # Apply tone sandhi: this information is needed both by the sound generation
        # and the colorisation, so we can't do it in generatereading
        dictreading = self.getdictreading(expression)
        dictreadingsandhi = transformations.tonesandhi(dictreading)

        # Preload the meaning, but only if we absolutely must
        if self.config.needmeanings:
            dictmeaningssources = [
                # Use CEDICT to get meanings
                (None, lambda: self.dictionary.meanings(
                    expression, self.config.prefersimptrad)),
                # Interpret Hanzi as numbers. NB: only consult after CEDICT so that we
                # handle curious numbers such as 'liang' using the dictionary
                (None, lambda: (numberutils.meaningfromnumberlike(
                    expression, self.dictionary), None))
            ] + (
                self.config.shouldusegoogletranslate and [
                    # If the dictionary can't answer our question, ask Google Translate.
                    # If there is a long word followed by another word then this will be treated as a phrase.
                    # Phrases are also queried using googletranslate rather than the local dictionary.
                    # This helps deal with small dictionaries (for example French)
                    ('<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>',
                     lambda: (dictionaryonline.gTrans(expression, self.config.
                                                      dictlanguage), None))
                ] or [])

            # Find the first source that returns a sensible meaning
            for dictmeaningssource, lookup in dictmeaningssources:
                dictmeanings, dictmeasurewords = lookup()
                if dictmeanings != None or dictmeasurewords != None:
                    break

            # If the user wants the measure words to be folded into the definition or there
            # is no MW field for us to split them out into, fold them in there
            if not (self.config.detectmeasurewords) or "mw" not in fact:
                # NB: do NOT overwrite the old dictmeasurewords, because we still want to use the
                # measure words for e.g. measure word audio generation
                dictmeanings = dictionary.combinemeaningsmws(
                    dictmeanings, dictmeasurewords)

            # NB: expression only used for Hanzi masking here
            meaning = self.generatemeanings(expression, dictmeanings)
            if meaning and dictmeaningssource:
                # Append attribution to the meaning if we have any
                meaning = meaning + dictmeaningssource

        # Generate translations of the expression into simplified/traditional on-demand
        expressionviews = utils.FactoryDict(
            lambda simptrad: self.generateincharactersystem(
                expression, simptrad))

        # Update the expression is option is turned on and the preference simp/trad is different to expression (i.e. needs correcting)
        expressionupdated = False
        if self.config.forceexpressiontobesimptrad and (
                expression != expressionviews[self.config.prefersimptrad]):
            expression = expressionviews[self.config.prefersimptrad]
            expressionupdated = True

        # Do the updates on the fields the user has requested:
        # NB: when adding an updater to this list, make sure that you have
        # added it to the updatecontrolflags dictionary in Config as well!
        updaters = {
            'expression':
            lambda: expression,
            'reading':
            lambda: self.generatereading(dictreadingsandhi),
            'meaning':
            lambda: meaning,
            'mw':
            lambda: self.generatemeasureword(self.config.detectmeasurewords and
                                             dictmeasurewords or None),
            'audio':
            lambda: self.generateaudio(dictreadingsandhi),
            'mwaudio':
            lambda: self.generatemwaudio(dictreading, dictmeasurewords),
            'color':
            lambda: self.generatecoloredcharacters(expression),
            'trad':
            lambda: (expressionviews["trad"] != expressionviews["simp"]
                     ) and expressionviews["trad"] or None,
            'simp':
            lambda: (expressionviews["trad"] != expressionviews["simp"]) and
            expressionviews["simp"] or None,
            'weblinks':
            lambda: self.weblinkgeneration(expression)
        }

        # Loop through each field, deciding whether to update it or not
        for key, updater in updaters.items():
            # A hint for reading this method: read the stuff inside the if not(...):
            # as an assertion that has to be valid before we can proceed with the update.

            # If this option has been disabled or the field isn't present then jump to the next update.
            # Expression is always updated because some parts of the code call updatefact with an expression
            # that is not yet set on the fact, and we need to make sure that it arrives. This is OK, because
            # we only actually modify a directly user-entered expression when forceexpressiontobesimptrad is on.
            #
            # NB: please do NOT do this if key isn't in updatecontrolflags, because that
            # indicates an error with the Toolkit that I'd like to get an exception for!
            if not (key in fact and
                    (key == "expression" or updatecontrolflags[key] is None
                     or self.config.settings[updatecontrolflags[key]])):
                continue

            # If the field is not empty already then skip (so we don't overwrite it), unless:
            # a) this is the expression field, which should always be over-written with simp/trad
            # b) this is the weblinks field, which must always be up to date
            # c) this is the color field and we have just forced the expression to change,
            #    in which case we'd like to overwrite the colored characters regardless
            if not (fact[key].strip() == u""
                    or key in ["expression", "weblinks"] or
                    (key == "color" and expressionupdated)):
                continue

            # Fill the field with the new value, but only if we have one and it is necessary to do so
            value = updater()
            if value != None and value != fact[key]:
                fact[key] = value