def dictmeaningsmws2mergeddictmeaningsmws(self, dictmeanings, dictmws, mwfieldinfact): # If the user wants the measure words to be folded into the definition or there # is no MW field for us to split them out into, fold them in there if dictmws and (not(self.config.detectmeasurewords) or not mwfieldinfact): return dictionary.combinemeaningsmws(dictmeanings, dictmws), [] else: return dictmeanings, dictmws
def updatefact(self, fact, expression): # AutoBlanking Feature - If there is no expression, zeros relevant fields # DEBUG - add feature to store the text when a lookup is performed. When new text is entered then allow auto-blank any field that has not been edited if expression == None or expression.strip() == u"": for key in ["reading", "meaning", "color", "trad", "simp", "weblinks"]: if key in fact: fact[key] = u"" # DEBUG Me - Auto generated pinyin should be at least "[sound:" + ".xxx]" (12 characters) plus pinyin (max 6). i.e. 18 # DEBUG - Split string around "][" to get the audio of each sound in an array. Blank the field unless any one string is longer than 20 characters # Exploit the fact that pinyin text-to-speech pinyin should be no longer than 18 characters to guess that anything longer is user generated # MaxB comment: I don't think that will work, because we import the Chinese-Lessons.com Mandarin Sounds into anki and it gives them /long/ names. # Instead, how about we check if all of the audio files referenced are files in the format pinyin<tone>.mp3? if 'audio' in fact and len(fact['audio']) < 40: fact['audio'] = u"" # For now this is a compromise in safety and function. # longest MW should be: "? - zhangì (9 char) # shortest possible is "? - ge" 6 char so we will autoblank if less than 12 letters # this means blanking will occur if one measure word is there but not if two (so if user added any they are safe) if 'mw' in fact and len(fact['mw']) < 12: fact['mw'] = u"" # TODO: Nick added this to give up after auto-blanking. He claims it removes a minor # delay, but I'm not sure where the delay originates from, which worries me: return # Apply tone sandhi: this information is needed both by the sound generation # and the colorisation, so we can't do it in generatereading dictreading = self.getdictreading(expression) dictreadingsandhi = transformations.tonesandhi(dictreading) # Preload the meaning, but only if we absolutely must if self.config.needmeanings: dictmeaningssources = [ # Use CEDICT to get meanings (None, lambda: self.dictionary.meanings(expression, self.config.prefersimptrad)), # Interpret Hanzi as numbers. NB: only consult after CEDICT so that we # handle curious numbers such as 'liang' using the dictionary (None, lambda: (numberutils.meaningfromnumberlike(expression, self.dictionary), None)) ] + (self.config.shouldusegoogletranslate and [ # If the dictionary can't answer our question, ask Google Translate. # If there is a long word followed by another word then this will be treated as a phrase. # Phrases are also queried using googletranslate rather than the local dictionary. # This helps deal with small dictionaries (for example French) ('<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>', lambda: (dictionaryonline.gTrans(expression, self.config.dictlanguage), None)) ] or []) # Find the first source that returns a sensible meaning for dictmeaningssource, lookup in dictmeaningssources: dictmeanings, dictmeasurewords = lookup() if dictmeanings != None or dictmeasurewords != None: break # If the user wants the measure words to be folded into the definition or there # is no MW field for us to split them out into, fold them in there if not(self.config.detectmeasurewords) or "mw" not in fact: # NB: do NOT overwrite the old dictmeasurewords, because we still want to use the # measure words for e.g. measure word audio generation dictmeanings = dictionary.combinemeaningsmws(dictmeanings, dictmeasurewords) # NB: expression only used for Hanzi masking here meaning = self.generatemeanings(expression, dictmeanings) if meaning and dictmeaningssource: # Append attribution to the meaning if we have any meaning = meaning + dictmeaningssource # Generate translations of the expression into simplified/traditional on-demand expressionviews = utils.FactoryDict(lambda simptrad: self.generateincharactersystem(expression, simptrad)) # Update the expression is option is turned on and the preference simp/trad is different to expression (i.e. needs correcting) expressionupdated = False if self.config.forceexpressiontobesimptrad and (expression != expressionviews[self.config.prefersimptrad]): expression = expressionviews[self.config.prefersimptrad] expressionupdated = True # Do the updates on the fields the user has requested: # NB: when adding an updater to this list, make sure that you have # added it to the updatecontrolflags dictionary in Config as well! updaters = { 'expression' : lambda: expression, 'reading' : lambda: self.generatereading(dictreadingsandhi), 'meaning' : lambda: meaning, 'mw' : lambda: self.generatemeasureword(self.config.detectmeasurewords and dictmeasurewords or None), 'audio' : lambda: self.generateaudio(dictreadingsandhi), 'mwaudio' : lambda: self.generatemwaudio(dictreading, dictmeasurewords), 'color' : lambda: self.generatecoloredcharacters(expression), 'trad' : lambda: (expressionviews["trad"] != expressionviews["simp"]) and expressionviews["trad"] or None, 'simp' : lambda: (expressionviews["trad"] != expressionviews["simp"]) and expressionviews["simp"] or None, 'weblinks' : lambda: self.weblinkgeneration(expression) } # Loop through each field, deciding whether to update it or not for key, updater in updaters.items(): # A hint for reading this method: read the stuff inside the if not(...): # as an assertion that has to be valid before we can proceed with the update. # If this option has been disabled or the field isn't present then jump to the next update. # Expression is always updated because some parts of the code call updatefact with an expression # that is not yet set on the fact, and we need to make sure that it arrives. This is OK, because # we only actually modify a directly user-entered expression when forceexpressiontobesimptrad is on. # # NB: please do NOT do this if key isn't in updatecontrolflags, because that # indicates an error with the Toolkit that I'd like to get an exception for! if not(key in fact and (key == "expression" or updatecontrolflags[key] is None or self.config.settings[updatecontrolflags[key]])): continue # If the field is not empty already then skip (so we don't overwrite it), unless: # a) this is the expression field, which should always be over-written with simp/trad # b) this is the weblinks field, which must always be up to date # c) this is the color field and we have just forced the expression to change, # in which case we'd like to overwrite the colored characters regardless if not(fact[key].strip() == u"" or key in ["expression", "weblinks"] or (key == "color" and expressionupdated)): continue # Fill the field with the new value, but only if we have one and it is necessary to do so value = updater() if value != None and value != fact[key]: fact[key] = value
def updatefact(self, fact, expression): # AutoBlanking Feature - If there is no expression, zeros relevant fields # DEBUG - add feature to store the text when a lookup is performed. When new text is entered then allow auto-blank any field that has not been edited if expression == None or expression.strip() == u"": for key in [ "reading", "meaning", "color", "trad", "simp", "weblinks" ]: if key in fact: fact[key] = u"" # DEBUG Me - Auto generated pinyin should be at least "[sound:" + ".xxx]" (12 characters) plus pinyin (max 6). i.e. 18 # DEBUG - Split string around "][" to get the audio of each sound in an array. Blank the field unless any one string is longer than 20 characters # Exploit the fact that pinyin text-to-speech pinyin should be no longer than 18 characters to guess that anything longer is user generated # MaxB comment: I don't think that will work, because we import the Chinese-Lessons.com Mandarin Sounds into anki and it gives them /long/ names. # Instead, how about we check if all of the audio files referenced are files in the format pinyin<tone>.mp3? if 'audio' in fact and len(fact['audio']) < 40: fact['audio'] = u"" # For now this is a compromise in safety and function. # longest MW should be: "? - zhangì (9 char) # shortest possible is "? - ge" 6 char so we will autoblank if less than 12 letters # this means blanking will occur if one measure word is there but not if two (so if user added any they are safe) if 'mw' in fact and len(fact['mw']) < 12: fact['mw'] = u"" # TODO: Nick added this to give up after auto-blanking. He claims it removes a minor # delay, but I'm not sure where the delay originates from, which worries me: return # Apply tone sandhi: this information is needed both by the sound generation # and the colorisation, so we can't do it in generatereading dictreading = self.getdictreading(expression) dictreadingsandhi = transformations.tonesandhi(dictreading) # Preload the meaning, but only if we absolutely must if self.config.needmeanings: dictmeaningssources = [ # Use CEDICT to get meanings (None, lambda: self.dictionary.meanings( expression, self.config.prefersimptrad)), # Interpret Hanzi as numbers. NB: only consult after CEDICT so that we # handle curious numbers such as 'liang' using the dictionary (None, lambda: (numberutils.meaningfromnumberlike( expression, self.dictionary), None)) ] + ( self.config.shouldusegoogletranslate and [ # If the dictionary can't answer our question, ask Google Translate. # If there is a long word followed by another word then this will be treated as a phrase. # Phrases are also queried using googletranslate rather than the local dictionary. # This helps deal with small dictionaries (for example French) ('<br /><span style="color:gray"><small>[Google Translate]</small></span><span> </span>', lambda: (dictionaryonline.gTrans(expression, self.config. dictlanguage), None)) ] or []) # Find the first source that returns a sensible meaning for dictmeaningssource, lookup in dictmeaningssources: dictmeanings, dictmeasurewords = lookup() if dictmeanings != None or dictmeasurewords != None: break # If the user wants the measure words to be folded into the definition or there # is no MW field for us to split them out into, fold them in there if not (self.config.detectmeasurewords) or "mw" not in fact: # NB: do NOT overwrite the old dictmeasurewords, because we still want to use the # measure words for e.g. measure word audio generation dictmeanings = dictionary.combinemeaningsmws( dictmeanings, dictmeasurewords) # NB: expression only used for Hanzi masking here meaning = self.generatemeanings(expression, dictmeanings) if meaning and dictmeaningssource: # Append attribution to the meaning if we have any meaning = meaning + dictmeaningssource # Generate translations of the expression into simplified/traditional on-demand expressionviews = utils.FactoryDict( lambda simptrad: self.generateincharactersystem( expression, simptrad)) # Update the expression is option is turned on and the preference simp/trad is different to expression (i.e. needs correcting) expressionupdated = False if self.config.forceexpressiontobesimptrad and ( expression != expressionviews[self.config.prefersimptrad]): expression = expressionviews[self.config.prefersimptrad] expressionupdated = True # Do the updates on the fields the user has requested: # NB: when adding an updater to this list, make sure that you have # added it to the updatecontrolflags dictionary in Config as well! updaters = { 'expression': lambda: expression, 'reading': lambda: self.generatereading(dictreadingsandhi), 'meaning': lambda: meaning, 'mw': lambda: self.generatemeasureword(self.config.detectmeasurewords and dictmeasurewords or None), 'audio': lambda: self.generateaudio(dictreadingsandhi), 'mwaudio': lambda: self.generatemwaudio(dictreading, dictmeasurewords), 'color': lambda: self.generatecoloredcharacters(expression), 'trad': lambda: (expressionviews["trad"] != expressionviews["simp"] ) and expressionviews["trad"] or None, 'simp': lambda: (expressionviews["trad"] != expressionviews["simp"]) and expressionviews["simp"] or None, 'weblinks': lambda: self.weblinkgeneration(expression) } # Loop through each field, deciding whether to update it or not for key, updater in updaters.items(): # A hint for reading this method: read the stuff inside the if not(...): # as an assertion that has to be valid before we can proceed with the update. # If this option has been disabled or the field isn't present then jump to the next update. # Expression is always updated because some parts of the code call updatefact with an expression # that is not yet set on the fact, and we need to make sure that it arrives. This is OK, because # we only actually modify a directly user-entered expression when forceexpressiontobesimptrad is on. # # NB: please do NOT do this if key isn't in updatecontrolflags, because that # indicates an error with the Toolkit that I'd like to get an exception for! if not (key in fact and (key == "expression" or updatecontrolflags[key] is None or self.config.settings[updatecontrolflags[key]])): continue # If the field is not empty already then skip (so we don't overwrite it), unless: # a) this is the expression field, which should always be over-written with simp/trad # b) this is the weblinks field, which must always be up to date # c) this is the color field and we have just forced the expression to change, # in which case we'd like to overwrite the colored characters regardless if not (fact[key].strip() == u"" or key in ["expression", "weblinks"] or (key == "color" and expressionupdated)): continue # Fill the field with the new value, but only if we have one and it is necessary to do so value = updater() if value != None and value != fact[key]: fact[key] = value