def load_anki_data(kanji_list): kanji_list = set(kanji_list) # Find out which kanji we actually have cards for expected = set() for kanji in Kanji.all(): if kanji.suspended: continue expected.add(kanji.kanji) # Kanji words also get to add to the whitelist actual = set() for word in Counter.all() + KanjiWord.all(): if word.suspended: continue # Add all the kanji in the word for kanji in word.kanji: # Make sure we only add kanji if kana.is_kana(kanji): continue actual.add(kanji) extra = load_extra(settings.EXTRA_DICT_KANJI) # Find which kanji we have no cards for missing = actual - expected if len(missing): message("Missing Kanji Found", ' '.join(missing)) # Notify the user of any kanji that don't have examples (no kanji-words) no_example = expected - actual if len(no_example): message("Kanji with no Examples", ' '.join(no_example)) # Notify the user of any kanji that aren't in our dictionary unknown = (expected | actual) - (kanji_list | extra) if len(unknown): message("Unknown Kanji, not in Dict:", ' '.join(unknown)) # Now we finally make our known kanji list known = (expected | actual) return known
# if kana.is_kana(reading['base']): # raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % ( # reading['base'], word.kanji, word.reading # )) # else: # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % ( # reading['base'], word.kanji # )) # if reading['reading'] not in kanji.readings and kanji.kanji != '々': # print '%s(%s) word(%s)' % ( # kanji.kanji, # reading['reading'], # word.kanji, # ) for kanji in Kanji.all(): for reading in kanji._readings: possible = map(unicode, reading.get_all()) # See if we can find the reading in our kanji words list found = False for use in mapping[kanji.kanji]: if use in possible: found = True # Ignore any that are used if found: continue # See if its been ignored string = "(kanji: %s) %s" % (kanji.kanji, reading)
out = parser.parse_args(args) return out if __name__ == '__main__': force_UTF8() args = parse() # Find all the kanji that are in the deck all_kanji = set() for word in KanjiWord.all(): for kanji in word.kanji: all_kanji.add(kanji) for kanji in Kanji.all(): all_kanji.add(kanji) # Count which kanji the input data has data = Counter(unicode(sys.stdin.read())) for char, count in data.most_common(): # we don't want kana if kana.is_kana(char): del data[char] # Nor do we want kanji we know if char in all_kanji: del data[char] # Nor any non-kanji chars if not kana.is_kanji(char): del data[char]