def workfile():
    print('working in file')
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        if len(line) < 5:
            workfiles.write_tmpfile(cnt, line, 'a')
            continue
        terms = line.split('\t')
        if len(terms) == 1:
            workfiles.write_tmpfile(cnt, line, 'a')
            continue
        word = terms[0].strip()

        google = str(terms[2])
        reverso = str(terms[3])
        yandex = str(terms[4])
        cambridge = str(terms[5])
        linguee = str(terms[6])

        yandex = accentuation_only_matching(yandex, google, reverso, cambridge,
                                            linguee)
        linguee = only_matching(linguee, [google, reverso, cambridge, yandex])

        yandex = remove_duplicated_terms(yandex)
        linguee = remove_duplicated_terms(linguee)

        line = f'{word}\t{yandex}\t{linguee}\n'

        workfiles.write_tmpfile(cnt, line, 'a')
    rd.close()
def rem_spell_synonyms():
    print('removing fields spell and synonyms')
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        line = line.replace('\n', '')
        fields = line.split('\t')
        newline = fields[0] + '\t' + fields[1] + '\t' + fields[
            3] + '\t' + fields[4] + '\n'
        workfiles.write_tmpfile(cnt, newline, 'a')
    rd.close()
def sound_mp3_directory():
    print('change the reference of sound in mp3 field')
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        line = line.replace('\n', '')
        fields = line.split('\t')
        fields[2] = workfiles.add_wordlist_dictionary_soundmp3(fields[2])
        newline = fields[0] + '\t' + fields[1] + '\t' + fields[
            2] + '\t' + fields[3] + '\n'
        workfiles.write_tmpfile(cnt, newline, 'a')
    rd.close()
def add_tabs():
    print('adding tabs')
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        if line.count('\t') == 1:
            line = line[:-1] + '\t\t\t\t\t\n'
        if line.count('\t') == 5:
            first_tab = line.find('\t')
            line = line[:first_tab] + '\t' + line[first_tab:]
        workfiles.write_tmpfile(cnt, line, 'a')
    rd.close()
def rem_definitions_duplicate():
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        fourth_tab = line.find('\t')
        fourth_tab = line.find('\t', fourth_tab + 1)
        fourth_tab = line.find('\t', fourth_tab + 1)
        fourth_tab = line.find('\t', fourth_tab + 1)
        fifth_tab = line.find('\t', fourth_tab + 1)
        definitions = line[fourth_tab + 1:fifth_tab]
        list_definitions = definitions.split(',')
        list_uniq = list(dict.fromkeys(list_definitions))
        str_definitions = ','.join(list_uniq)
        line = line[:fourth_tab + 1] + str_definitions + line[fifth_tab:]
        workfiles.write_tmpfile(cnt, line, 'a')
    rd.close()
Beispiel #6
0
def workfile():
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        if len(line) < 5:
            workfiles.write_tmpfile(cnt, line, 'a')
            continue
        terms = line.split('\t')
        if len(terms) == 1:
            workfiles.write_tmpfile(cnt, line, 'a')
            continue
        word = terms[0].strip()
        babla = terms[7].strip()
        babla = only_word_flexion(word, babla)
        line = f'{word}\t{babla}\n'
        workfiles.write_tmpfile(cnt, line, 'a')
    rd.close()
def workfile():
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line:
            break
        if len(line) < 5:
            workfiles.write_tmpfile(cnt, line, 'a')
            continue
        terms = line.split('\t')
        if len(terms) == 1:
            workfiles.write_tmpfile(cnt, line, 'a')
            continue
        word = terms[0].strip()
        free = terms[15].strip()
        free = clean_infos(free)
        free = only_word_flexion(word, free)
        if len(free) > 99:
            free = ''
        line = f'{word}\t{free}\n'
        workfiles.write_tmpfile(cnt, line, 'a')
    rd.close()
def workfile():
    rd = workfiles.read_lasttmp_or_lists()
    cnt = workfiles.new_tmpfile()
    while True:
        line = rd.readline()
        if not line :
            break
        if len(line) < 5 :
            workfiles.write_tmpfile(cnt,line,'a')
            continue
        terms = line.split('\t')
        if len(terms) == 1 :
            workfiles.write_tmpfile(cnt,line,'a')
            continue
        word = terms[0].strip()

        babla = terms[1].strip()
        cambridge_def = terms[2]
        free_def = terms[3]
        mac_def = terms[4]
        google = terms[5]
        reverso = terms[6]
        yandex = terms[7]
        linguee = terms[8]
        cambridge_pt = terms[9]
        free_pt = terms[10].strip()


        babla = only_word_flexion(word,babla)

        cambridge_def = organize_definitions_with_br(cambridge_def)


        free_def = clean_lengthy_definitions(free_def)

        mac_def = clean_lengthy_definitions(mac_def)



        google = reorganizeTranslations(word,google)
        google = remove_lower_frequency(google)
        google = trim_all(google)
        google = remove_same_word(word,google)
        google = remove_same_translations_without_frequency(google)
        google = only_four_translations(google)
        google = remove_duplicated_terms_freq(google)


        reverso = trim_all(reverso)
        yandex = trim_all(yandex)
        linguee = trim_all(linguee)
        cambridge_pt = trim_all(cambridge_pt)
        
        cambridge_pt = remove_dirt(cambridge_pt)

        reverso = reverso.replace('"','')
        reverso = remove_less_half_frequency(reverso)
        reverso = remove_same_word(word,reverso)
        reverso = only_four_translations(reverso)
        reverso = remove_duplicated_terms_freq(reverso)

        yandex = accentuation_only_matching(yandex,google,reverso,cambridge_pt,linguee)
        linguee = only_matching(linguee,google,reverso,cambridge_pt,yandex)

        yandex = remove_duplicated_terms(yandex)
        linguee = remove_duplicated_terms(linguee)

        free_pt = clean_infos(free_pt)
        free_pt = only_word_flexion_free(word,free_pt)
        free_pt = exclude_lengthy_translation(free_pt)

        line = f'{word}\t{babla}\t{cambridge_def}\t{free_def}\t{mac_def}\t{google}\t{reverso}\t{yandex}\t{linguee}\t{cambridge_pt}\t{free_pt}\t\t\t\t\n'
        workfiles.write_tmpfile(cnt,line,'a')
    rd.close()