Ejemplo n.º 1
0
 def get_vocab_from_selector(selector_id):
     jpDB = Japanese_DB_handler()
     f = jpDB.base_format
     data = jpDB.get_item_by_id(f.kanjis, selector_id, f.kanjis.name)
     if not data: return None
     selector_name = data[0]
     item_to_get = www_config.get_vocab_format_including_id()
     vocab_list = jpDB.list_word_by_kanjis(selector_name, *item_to_get)
     return vocab_list
Ejemplo n.º 2
0
    def get_selector_name_from_id(cls, selector_id):
        jpDB = Japanese_DB_handler()
        f = jpDB.base_format

        table = f.get_table(cls.sub_url)
        field = f.get_field(cls.sub_url, 'name')

        data = jpDB.get_item_by_id(table, selector_id, field)
        if data: return data[0]
Ejemplo n.º 3
0
def parse(inputFile, outputDir):

    # getting configuration and BD. 
    db_handler = Japanese_DB_handler()
    #config_data = _parseConf()
    config_data = configuration.get_configuration()
    if not config_data :
        log.error("couldn't find get configuration data")
        return 

    copyfile(inputFile, _generateFileName(config_data.input_files_bk, 'input'))

    f = db_handler.base_format
    existing_kanjis = db_handler.list(f.vocab, f.vocab.word)

    potentialErrors = []
    newEntriesList = []

    # Parsing input file. 
    with open(inputFile, 'r') as fin:
        for row in csv.reader(fin, delimiter='	'):
            # usefull to just get half of the list
            # but question are not necessrely before awnser
            # we forced japanese as row[0]

            word = row[0]
            meaning = row[1]
            prononciation = row[2] if row[2] else ''
            exemple = ''

            if word not in existing_kanjis :
                newEntriesList.append(['','',word, prononciation, meaning, exemple])
            else :
                log.error('already exists : '+word)

    nb_of_files = len(newEntriesList)//100
    if len(newEntriesList)%100 != 0 :
        nb_of_files += 1

    outputDir += '/'
    for nb in range(1, nb_of_files+1, 1):
        fileName = _generateFileName(outputDir, "int", str(nb))
        with open(fileName, 'w') as fout:
            writer = csv.writer(fout, delimiter= '	')
            writer.writerow(['categorie','tag','word','prononciation','meaning','exemple'])
            for entry in newEntriesList[100 * (nb - 1) : 100 * nb] : 
                writer.writerow(entry)

    fileName = _generateFileName(outputDir, "int", '_pottentialErrors')
    with open(fileName, 'w') as fout:
        writer = csv.writer(fout, delimiter= '	')
        for error in potentialErrors:
            writer.writerow(error)
            log.error(error)
    return 
Ejemplo n.º 4
0
def hello():
    jpDB = Japanese_DB_handler()
    stat_dict = jpDB.get_db_stat()
    output = ''
    for list_name, list_info in stat_list_names.items():
        most_used, number = list_info
        output += template('list_test',
                           rows=stat_dict[most_used],
                           list_name=list_name,
                           number=stat_dict[number])
    return output
Ejemplo n.º 5
0
def categorie_page(categorie_id):

    jpDB = Japanese_DB_handler()
    f = jpDB.base_format
    checked_cat_id = categorie_id if jpDB.check_categorie_existence(
        categorie_id) else None

    if checked_cat_id:
        item_to_get = (f.vocab.word, f.vocab.prononciation, f.vocab.meaning,
                       f.vocab.example)
        vocab_rows = jpDB.list_word_by_categorie(categorie_id, *item_to_get)
    else:
        vocab_rows = ()

    output = template('categorie', name=checked_cat_id, rows=vocab_rows)
    return output
Ejemplo n.º 6
0
def list_cat_tag_from_csv_files(*input_file_list, log_info=False):
    csv_all_cat = set()  # all cat found in csv
    csv_all_tag = set()  # all tag found in csv
    csv_existing_cat = set()  # cat found in csv files already existing in DB.
    csv_existing_tag = set()  # tag found in csv files already existing in DB.
    csv_new_cat = set()  # new cat found in at least on csv file.
    csv_new_tag = set()  # new tag found in at least on csv file.

    jpDB = Japanese_DB_handler()
    f = jpDB.base_format

    jpDB_cat = set(jpDB.select(f.categories, f.categories.name))
    jpDB_tag = set(jpDB.select(f.tags, f.tags.name))

    for input_file in input_file_list:
        found_cat, found_tag = _list_cat_tag_process_single_file(input_file)
        csv_all_cat.update(found_cat)
        csv_all_tag.update(found_tag)

    csv_existing_cat = csv_all_cat & jpDB_cat
    csv_existing_tag = csv_all_tag & jpDB_tag

    csv_new_cat = csv_all_cat - csv_existing_cat
    csv_new_tag = csv_all_tag - csv_existing_tag

    if log_info:
        _log_separator('categories')
        log.info('1 : existing categories : ')
        _log_set(csv_existing_cat)
        log.info('          ')
        log.info('2 : existing categories : ')
        _log_set(csv_new_cat)
        log.info('          ')
        _log_separator('tags')
        log.info('1 : existing tags : ')
        _log_set(csv_existing_tag)
        log.info('          ')
        log.info('2 : existing tags : ')
        _log_set(csv_new_tag)
        log.info('          ')

    return csv_existing_cat, csv_new_cat, csv_existing_tag, csv_new_tag
Ejemplo n.º 7
0
def words_page():
    item_to_get = www_config.get_vocab_format_including_id()
    vocab_list = Japanese_DB_handler().list_all_words(*item_to_get)

    name = application_title + ", words"
    css_file = main_css

    body = header_kioku()
    body += create_add_word_page()
    body += list_vocabulary(www_config.get_vocab_format_as_string(),
                            vocab_list)

    data = page_base_structure(name, css_file, body)
    return data
Ejemplo n.º 8
0
def add_word_status(method='GET'):
    print('aaaaaa')

    #word = request.forms.get('word')
    #prononciation = request.forms.get('prononciation')
    #meaning = request.forms.get('meaning')
    #example = request.forms.get('example')
    #categorie = request.forms.get('categorie')
    #tag = request.forms.get('tag')

    status = Japanese_DB_handler().add_single_word(
        request.GET.word, request.GET.prononciation, request.GET.meaning,
        request.GET.example, request.GET.categorie, request.GET.tag)

    name = application_title + ': add new word, status'
    css_file = main_css

    body = header_kioku()
    body += template('add_word_status', request.GET.word, status)

    data = page_base_structure(name, css_file, body)
    return data
Ejemplo n.º 9
0
def _add_vocab_fromCsv_dir(csv_file_list, add_categories, add_tags):

    cat_in_csv = set()
    tag_in_csv = set()
    vocab_entries = set()

    error_entries = []

    fields = [
        'categorie', 'tag', 'word', 'prononciation', 'meaning', 'example'
    ]
    delimiter = _get_delimiter()
    delimiter = "	"

    for file in csv_file_list:
        with open(file, 'r') as csv_file:
            reader = csv.DictReader(csv_file,
                                    fieldnames=fields,
                                    delimiter=delimiter)
            for row in reader:
                if row['tag'] != 'tag':
                    if "ERROR" in row['tag']:
                        error_entries.append(_format_row(row))
                    else:
                        cat_in_csv.add(row['categorie'])
                        tag_in_csv.add(row['tag'])
                        vocab_entries.add(_format_row(row))

    jpDb = Japanese_DB_handler()

    if add_categories:
        jpDb.add_categories(*tuple(cat_in_csv), silent=True)
    if add_tags:
        jpDb.add_tags(*tuple(tag_in_csv), silent=True)
    status = jpDb.add_vocab(*tuple(vocab_entries))

    return status, error_entries
Ejemplo n.º 10
0
 def get_word_data(word_id):
     jpDB = Japanese_DB_handler()
     return jpDB.get_word_info(word_id)
Ejemplo n.º 11
0
 def get_selector_list_data():
     jpDB = Japanese_DB_handler()
     sel_list = jpDB.list_categorie_by_usage(include_id=True)
     sel_number = jpDB.count(jpDB.base_format.categories)
     return sel_list, sel_number
Ejemplo n.º 12
0
import os
import sys
import csv
from japanese.Japanese_DB_handler import Japanese_DB_handler
import configuration as configuration

logging.basicConfig()
log = logging.getLogger()
log.setLevel(logging.DEBUG)

output_dir = sys.argv[1]
if not os.path.exists(output_dir):
    log.error('directory not found : ' + str(output_dir))
    sys.exit(1)

jpDB = Japanese_DB_handler()
f = jpDB.base_format
config_data = configuration.get_configuration()

if not config_data:
    log.error("couldn't find get configuration data")
    sys.exit(1)

cat_dir = {}
tag_dir = {}

cat_list = jpDB.list_categorie_by_usage()
tag_list = jpDB.list_tag_by_usage()

for cat, _ in cat_list:
    if not cat: continue
Ejemplo n.º 13
0
def categories_page():
    jpDB = Japanese_DB_handler()
    data = jpDB.list_categorie_by_usage()
    output = template('full_list', rows=data, list_name='categories')
    return output
Ejemplo n.º 14
0
 def update_name(orig_name, new_name):
     return Japanese_DB_handler().edit_cat(orig_name, new_name)
Ejemplo n.º 15
0
 def get_selector_list_data():
     jpDB = Japanese_DB_handler()
     sel_list = jpDB.list_core_p_by_usage(include_id=True)
     sel_number = jpDB.count(jpDB.base_format.core_prononciations)
     return sel_list, sel_number
Ejemplo n.º 16
0
def stat_test():
    jpDB = Japanese_DB_handler()
    print(jpDB)
    print(jpDB.base_format)
    stat_dict = jpDB.get_db_stat()
    return str(dict(stat_dict['most_used_categories']))
Ejemplo n.º 17
0
def _get_DB_format():
    global db_format
    if not db_format: db_format = Japanese_DB_handler().base_format
    return db_format
Ejemplo n.º 18
0
def parse(inputFile, outputDir):

    # getting configuration and BD.
    db_handler = Japanese_DB_handler()
    #config_data = _parseConf()
    config_data = configuration.get_configuration()
    if not config_data:
        log.error("couldn't find get configuration data")
        return

    copyfile(inputFile, _generateFileName(config_data.input_files_bk, 'input'))

    f = db_handler.base_format
    existing_kanjis = db_handler.list(f.vocab, f.vocab.word)

    potentialErrors = []
    newEntriesList = []

    # Parsing input file.
    with open(inputFile, 'r') as fin:
        for row in csv.reader(fin, delimiter='	'):
            # usefull to just get half of the list
            # but question are not necessrely before awnser
            # we forced japanese as row[0]

            if not _is_cjk(row[0][0]):
                continue

            japanese = row[0]
            french = row[1]

            # print(japanese)

            # 3 cases :
            # 1, juste kana
            # 2, a bunch of kanji and kana prononciation
            # 3, 2 + a sentence exemple.

            # 1) no kanjis

            status = True
            if ' ' not in japanese:
                word = japanese
                prononciation = ''
                exemple = ''

            else:
                potentialKanjis, afterKanjis = japanese.split(' ', 1)

                # remove trailing spaces.
                afterKanjis = _delTrailingSpaces(afterKanjis)

                if afterKanjis[:2] == 'する':
                    potentialKanjis += ' (する)'
                    afterKanjis = _delTrailingSpaces(afterKanjis[2:])

                if afterKanjis[:2] == 'な ':
                    potentialKanjis += ' (な)'
                    afterKanjis = _delTrailingSpaces(afterKanjis[1:])

                # x) Potentials errors : Full phrase.
                if len(potentialKanjis) > 7:
                    log.error('potential error :' + potentialKanjis)
                    status = False
                    potentialErrors.append(row)

                # 2) just kanjis and prononciation
                elif ' ' not in afterKanjis:
                    word = potentialKanjis
                    prononciation = _delTrailingSpaces(afterKanjis)
                    exemple = ''

                # 3) kanjis prononciation and exemple
                else:
                    word = potentialKanjis
                    prononciation, exemple = afterKanjis.split(' ', 1)
                    prononciation = _delTrailingSpaces(prononciation)
                    exemple = _delTrailingSpaces(exemple)

            if status and word not in existing_kanjis:
                newEntriesList.append(
                    ['', '', word, prononciation, french, exemple])
            else:
                log.error('already exists : ' + word)

    nb_of_files = len(newEntriesList) // 100
    if len(newEntriesList) % 100 != 0:
        nb_of_files += 1

    outputDir += '/'
    for nb in range(1, nb_of_files + 1, 1):
        fileName = _generateFileName(outputDir, "int", str(nb))
        with open(fileName, 'w') as fout:
            writer = csv.writer(fout, delimiter='	')
            writer.writerow([
                'categorie', 'tag', 'word', 'prononciation', 'meaning',
                'exemple'
            ])
            for entry in newEntriesList[100 * (nb - 1):100 * nb]:
                writer.writerow(entry)

    fileName = _generateFileName(outputDir, "int", '_pottentialErrors')
    with open(fileName, 'w') as fout:
        writer = csv.writer(fout, delimiter='	')
        for error in potentialErrors:
            writer.writerow(error)
            log.error(error)
    return
Ejemplo n.º 19
0
def _jpdb():
    global _jpdb_object
    if not _jpdb_object: _jpdb_object = Japanese_DB_handler()
    return _jpdb_object