Ejemplo n.º 1
0
def main(args=None):
    args = parse(args)

    word_filter = create_filter(args.tags)

    words = KanjiWord.all()
    words = filter(lambda val: word_filter(val.tags), words)
    sample = random.sample(words, args.count)
    for word in sample:
        print word.kanji
Ejemplo n.º 2
0
def main(args=None):
    args = parse(args)

    word_filter = create_filter(args.tags)

    words = KanjiWord.all()
    words = filter(lambda val: word_filter(val.tags), words)
    sample = random.sample(words, args.count)
    for word in sample:
        print word.kanji
Ejemplo n.º 3
0
def load_anki_data(kanji_list):
    kanji_list = set(kanji_list)

    # Find out which kanji we actually have cards for
    expected = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        expected.add(kanji.kanji)

    # Kanji words also get to add to the whitelist
    actual = set()
    for word in Counter.all() + KanjiWord.all():
        if word.suspended:
            continue

        # Add all the kanji in the word
        for kanji in word.kanji:
            # Make sure we only add kanji
            if kana.is_kana(kanji):
                continue

            actual.add(kanji)

    extra = load_extra(settings.EXTRA_DICT_KANJI)

    # Find which kanji we have no cards for
    missing = actual - expected
    if len(missing):
        message("Missing Kanji Found", ' '.join(missing))

    # Notify the user of any kanji that don't have examples (no kanji-words)
    no_example = expected - actual
    if len(no_example):
        message("Kanji with no Examples", ' '.join(no_example))

    # Notify the user of any kanji that aren't in our dictionary
    unknown = (expected | actual) - (kanji_list | extra)
    if len(unknown):
        message("Unknown Kanji, not in Dict:", ' '.join(unknown))

    # Now we finally make our known kanji list
    known = (expected | actual)

    return known
Ejemplo n.º 4
0
def load_anki_data(kanji_list):
    kanji_list = set(kanji_list)

    # Find out which kanji we actually have cards for
    expected = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        expected.add(kanji.kanji)

    # Kanji words also get to add to the whitelist
    actual = set()
    for word in Counter.all() + KanjiWord.all():
        if word.suspended:
            continue

        # Add all the kanji in the word
        for kanji in word.kanji:
            # Make sure we only add kanji
            if kana.is_kana(kanji):
                continue

            actual.add(kanji)

    extra = load_extra(settings.EXTRA_DICT_KANJI)

    # Find which kanji we have no cards for
    missing = actual - expected
    if len(missing):
        message("Missing Kanji Found", ' '.join(missing))

    # Notify the user of any kanji that don't have examples (no kanji-words)
    no_example = expected - actual
    if len(no_example):
        message("Kanji with no Examples", ' '.join(no_example))

    # Notify the user of any kanji that aren't in our dictionary
    unknown = (expected | actual) - (kanji_list | extra)
    if len(unknown):
        message("Unknown Kanji, not in Dict:", ' '.join(unknown))

    # Now we finally make our known kanji list
    known = (expected | actual)

    return known
Ejemplo n.º 5
0
from models.kanji_word import KanjiWord
from utf8_helper import force_UTF8
import kana

from collections import defaultdict
from itertools import islice

import settings


if __name__ == '__main__':
    force_UTF8()

    # Get the readings of every single kanji
    mapping = defaultdict(list)
    for word in KanjiWord.all():
        for reading in word.kanji_readings:
            mapping[reading['base']].append(reading['reading'])

    # Now remove any that are used
    # try:
    #     kanji = Kanji.find(reading['base'])
    # except KeyError:
    #     if kana.is_kana(reading['base']):
    #         raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % (
    #             reading['base'], word.kanji, word.reading
    #         ))
    #     else:
    #         raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % (
    #             reading['base'], word.kanji
    #         ))
Ejemplo n.º 6
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from models.kanji import Kanji
from models.kanji_word import KanjiWord
from utf8_helper import force_UTF8
import kana

import settings


if __name__ == '__main__':
    force_UTF8()

    # First we need to read out whitelist
    whitelist = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        whitelist.add(kanji.kanji)

    # Now we filter out any KanjiWords that use other kanji
    for kanji_word in KanjiWord.all():
        fine = True
        for kanji in kanji_word.kanji:
            if kana.is_kana(kanji) and kanji not in whitelist:
                fine = False

        if fine:
            kanji_word.mark_suspended(False)

Ejemplo n.º 7
0
                        type=int,
                        default=10,
                        help='The number of words to display')

    out = parser.parse_args(args)
    return out


if __name__ == '__main__':
    force_UTF8()

    args = parse()

    # Find all the kanji that are in the deck
    all_kanji = set()
    for word in KanjiWord.all():
        for kanji in word.kanji:
            all_kanji.add(kanji)
    for kanji in Kanji.all():
        all_kanji.add(kanji)

    # Count which kanji the input data has
    data = Counter(unicode(sys.stdin.read()))
    for char, count in data.most_common():
        # we don't want kana
        if kana.is_kana(char):
            del data[char]
        # Nor do we want kanji we know
        if char in all_kanji:
            del data[char]
        # Nor any non-kanji chars
Ejemplo n.º 8
0
# Path for the ignore unused readings file
UNUSED_READINGS = 'unused.json'

# Path for the jlpt kanji file
JLPT_PATH = 'jlpt_kanji.json'

# Path for Kanji dictionary
FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')
KANJI_DICT = os.path.join(DATA, 'kanjidic2_common.xml')
# KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')

# Html Output templates
DATA_HEADER = os.path.join(DATA, 'header.html')
DATA_CSS = os.path.join(DATA, 'main.css')

EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json')

# Now we setup all the models
from models.anki import AnkiModel
from models.kanji_word import KanjiWord
from models.kanji import Kanji

AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB))
KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS))
Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS))

import jlpt

jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
Ejemplo n.º 9
0
# Path for Kanji dictionary
FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')
KANJI_DICT      = os.path.join(DATA, 'kanjidic2_common.xml')
# KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')

# Html Output templates
DATA_HEADER = os.path.join(DATA, 'header.html')
DATA_CSS    = os.path.join(DATA, 'main.css')


EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json')






# Now we setup all the models
from models.anki import AnkiModel
from models.kanji_word import KanjiWord
from models.kanji import Kanji

AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB))
KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS))
Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS))

import jlpt

jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
Ejemplo n.º 10
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from models.kanji import Kanji
from models.kanji_word import KanjiWord
from utf8_helper import force_UTF8
import kana

import settings

if __name__ == '__main__':
    force_UTF8()

    # First we need to read out whitelist
    whitelist = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        whitelist.add(kanji.kanji)

    # Now we filter out any KanjiWords that use other kanji
    for kanji_word in KanjiWord.all():
        fine = True
        for kanji in kanji_word.kanji:
            if kana.is_kana(kanji) and kanji not in whitelist:
                fine = False

        if fine:
            kanji_word.mark_suspended(False)