Esempio n. 1
0
 def zip_dict(dst_fname):
     dir_fpath, basename = os.path.split(dst_fname)
     root_dir, dir_fname = os.path.split(dir_fpath)
     
     # uzado de zip ne funkciigas, rompas Colordict - t.e. la lasta komencas blinki senfine
     #fmt = "zip"
     fmt = "gztar"
     
     # se vortara datumo estas ĝuste en arkivo, ne en ia dosierujo, do CSS/figuroj estas ne trovataj
     save_without_folder = False # True # 
     if save_without_folder:
         fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname), fmt, dir_fpath)
     else:
         fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname), fmt, root_dir, base_dir=dir_fname)
     
     ifo_fname = os.path.splitext(dst_fname)[0] + ".ifo"
     with open(ifo_fname) as ifo_f:
         properties = {}
         for line in ifo_f:
             lst = line.split("=")
             if len(lst) >= 2:
                 key, value = lst[0].strip(), lst[1].strip()
                 if key and value:
                     properties[key] = value
     
     words_cnt = int(properties.get("wordcount"))
     synwordcount = properties.get("synwordcount")
     if synwordcount:
         words_cnt +=  int(synwordcount)
     fname = os.path.basename(fname)
     # du spacetoj fine estas por Markdown liniavanco
     print("http://new.bombono.org/download/revo/%(fname)s\t%(words_cnt)s  " % locals())
Esempio n. 2
0
def install(rpkg_tar_gz, dir_fpath):
    assert os.path.isdir(dir_fpath)
    def set_current(to_last):
        src = "last" if to_last else "previous"
        # :TRICKY: для переопределения сущ. ссылки нужно одновременно -f и -n ,-
        # артефакт *nix
        call_cmd("ln -s -f -n %s current" % src, cwd=dir_fpath)
    
    last_fpath = o_p.join(dir_fpath, "last")
    curr_fpath = o_p.join(dir_fpath, "current")
    if os.path.exists(last_fpath):
        curr_exists = False
        if os.path.exists(curr_fpath):
            assert os.path.islink(curr_fpath)
            curr_exists = True
        
        prev_fpath = o_p.join(dir_fpath, "previous")
        if os.path.exists(prev_fpath):
            shutil.rmtree(prev_fpath)
            
        shutil.move(last_fpath, prev_fpath)
        if curr_exists:
            set_current(False)
        
    os.mkdir(last_fpath)
    
    call_cmd("tar -xzf %(rpkg_tar_gz)s -C %(last_fpath)s" % locals())
    set_current(True)
Esempio n. 3
0
def install(rpkg_tar_gz, dir_fpath):
    assert os.path.isdir(dir_fpath)

    def set_current(to_last):
        src = "last" if to_last else "previous"
        # :TRICKY: для переопределения сущ. ссылки нужно одновременно -f и -n ,-
        # артефакт *nix
        call_cmd("ln -s -f -n %s current" % src, cwd=dir_fpath)

    last_fpath = o_p.join(dir_fpath, "last")
    curr_fpath = o_p.join(dir_fpath, "current")
    if os.path.exists(last_fpath):
        curr_exists = False
        if os.path.exists(curr_fpath):
            assert os.path.islink(curr_fpath)
            curr_exists = True

        prev_fpath = o_p.join(dir_fpath, "previous")
        if os.path.exists(prev_fpath):
            shutil.rmtree(prev_fpath)

        shutil.move(last_fpath, prev_fpath)
        if curr_exists:
            set_current(False)

    os.mkdir(last_fpath)

    call_cmd("tar -xzf %(rpkg_tar_gz)s -C %(last_fpath)s" % locals())
    set_current(True)
Esempio n. 4
0
 def append_national_article(lang, names, txt):
     o_p_article, dst_fname = dictionaries.get(lang, (None, None))
     if o_p_article is None:
         dict_fpath = o_p.join(dct_prefix, "REVO_%(lang)s" % locals())
         # :REFACTOR:
         dst_fname = o_p.join(dict_fpath, "REVO-%(lang)s.txt" % locals())
         o_p_article = create_dictionary(dst_fname, "revo-traduko.css")
         dictionaries[lang] = o_p_article, dst_fname
         
         copy_prj_fname("sample/revo/eo-nacia/revo-traduko.css", o_p.join(dict_fpath, "res/revo-traduko.css"))
         
     o_p_article(names, txt)
Esempio n. 5
0
def make_rel2abs(rel_fpath, stack_depth=1):
    """
    Построить абсолютный путь относительно пути модуля
    Значение stack_depth такое же, как и для get_mdir()
    """ 
    dir_fpath = get_mdir(stack_depth+1)
    return o_p.join(dir_fpath, rel_fpath)
Esempio n. 6
0
            def append_national_article(lang, names, txt):
                o_p_article, dst_fname = dictionaries.get(lang, (None, None))
                if o_p_article is None:
                    dict_fpath = o_p.join(dct_prefix,
                                          "REVO_%(lang)s" % locals())
                    # :REFACTOR:
                    dst_fname = o_p.join(dict_fpath,
                                         "REVO-%(lang)s.txt" % locals())
                    o_p_article = create_dictionary(dst_fname,
                                                    "revo-traduko.css")
                    dictionaries[lang] = o_p_article, dst_fname

                    copy_prj_fname(
                        "sample/revo/eo-nacia/revo-traduko.css",
                        o_p.join(dict_fpath, "res/revo-traduko.css"))

                o_p_article(names, txt)
Esempio n. 7
0
def request_chunk(rtp_db, startstamp):
    payload = b''

    # список чанков ровно с таким UTC (на единицу больше уже не подходит)
    names = request_names(rtp_db, startstamp, 1)

    if names:
        with open(o_p.join(rtp_db2dir(rtp_db), names[0]), "rb") as f:
                payload = f.read()
                    
    return payload
Esempio n. 8
0
    def zip_dict(dst_fname):
        dir_fpath, basename = os.path.split(dst_fname)
        root_dir, dir_fname = os.path.split(dir_fpath)

        # uzado de zip ne funkciigas, rompas Colordict - t.e. la lasta komencas blinki senfine
        #fmt = "zip"
        fmt = "gztar"

        # se vortara datumo estas ĝuste en arkivo, ne en ia dosierujo, do CSS/figuroj estas ne trovataj
        save_without_folder = False  # True #
        if save_without_folder:
            fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname),
                                        fmt, dir_fpath)
        else:
            fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname),
                                        fmt,
                                        root_dir,
                                        base_dir=dir_fname)

        ifo_fname = os.path.splitext(dst_fname)[0] + ".ifo"
        with open(ifo_fname) as ifo_f:
            properties = {}
            for line in ifo_f:
                lst = line.split("=")
                if len(lst) >= 2:
                    key, value = lst[0].strip(), lst[1].strip()
                    if key and value:
                        properties[key] = value

        words_cnt = int(properties.get("wordcount"))
        synwordcount = properties.get("synwordcount")
        if synwordcount:
            words_cnt += int(synwordcount)
        fname = os.path.basename(fname)
        # du spacetoj fine estas por Markdown liniavanco
        print(
            "http://new.bombono.org/download/revo/%(fname)s\t%(words_cnt)s  " %
            locals())
Esempio n. 9
0
def create_vip_vortaro(dst_fname, remove_srcfile=True):
    
    use_external_css = False
    css_txt = read_all_file(o_p.join(project_dir_fname, "sample/vip/vip.css"))
    
    def parse_dictionary(on_parsed_article):
        src_fdir = make_dpath("articles")
        
        # :REFACTOR:
        for fname in os.listdir(src_fdir):
            txt = read_all_file(os.path.join(src_fdir, fname))
            
            all_names = []
            vip_href_prefix = "?w="
            fixed_text = []
            php_len = len(vip_href_prefix)
            def on_article(article):
                all_names.extend(get_search_words(article))
                
                for a in iter_tags(article, "a"):
                    href = a.get("href")
                    assert href.startswith(vip_href_prefix)
                    # подсказка по bword:
                    # http://siripong-english.blogspot.ru/2011/04/custom-dictionary-w-stardict-in-babylon.html
                    a.set("href", "bword://" + href[php_len:])
                    
                fixed_text.append(gen_html_text(article))
            process_vip_text(txt, on_article)
            
            txt = fixed_text[0]
            css_txt
            article_txt = txt # "<style>%(css_txt)s</style>%(txt)s" % locals()
            on_parsed_article(all_names, article_txt)
    
    make_kondratjev.make_dictionary_ex(parse_dictionary, dst_fname, css_text=None if use_external_css else css_txt,
                                    remove_srcfile=remove_srcfile, is_html=True)
Esempio n. 10
0
def make_path(name):
    return o_p.join(os.path.dirname(__file__), name)
Esempio n. 11
0
        pat = re.compile(r"^#EXT-X-STREAM-INF.+BANDWIDTH=(?P<bandwidth>\d+).*(?:\n|\r\n?)(?P<stream>.+)", re.MULTILINE)
        dst_fname = {
            "m3u8": "vlc.m3u8",
            "html": "tv_bl.html",
            "xspf": "vlc.xspf"
        }[fmt]

        req_clns = ["ts_port"]
        for cnxt in rewrite_channels(dst_fname, req_clns, fmt=fmt):
            # :TRICKY: своей колонки нет
            hls_idx = cnxt.clns["ts_port"] + 1
            url = cnxt.row[hls_idx]
            
            if url.startswith("http://"):
                print(name, url)
                try:
                    with contextlib.closing(get_url.get_url(url)) as pf:
                        txt = pf.read()
                except get_url.URLError:
                    pass
                else:
                    max_bw, max_url = 0, None
                    for m in pat.finditer(txt):
                        bw = int(m.group('bandwidth'))
                        if not max_bw or max_bw < bw:
                            max_bw = bw
                            max_url = m.group('stream')
                    assert max_url
                    max_url = o_p.join(os.path.dirname(url), max_url)
                    write_channel(cnxt, max_url)
Esempio n. 12
0
            re.MULTILINE)
        dst_fname = {
            "m3u8": "vlc.m3u8",
            "html": "tv_bl.html",
            "xspf": "vlc.xspf"
        }[fmt]

        req_clns = ["ts_port"]
        for cnxt in rewrite_channels(dst_fname, req_clns, fmt=fmt):
            # :TRICKY: своей колонки нет
            hls_idx = cnxt.clns["ts_port"] + 1
            url = cnxt.row[hls_idx]

            if url.startswith("http://"):
                print(name, url)
                try:
                    with contextlib.closing(get_url.get_url(url)) as pf:
                        txt = pf.read()
                except get_url.URLError:
                    pass
                else:
                    max_bw, max_url = 0, None
                    for m in pat.finditer(txt):
                        bw = int(m.group('bandwidth'))
                        if not max_bw or max_bw < bw:
                            max_bw = bw
                            max_url = m.group('stream')
                    assert max_url
                    max_url = o_p.join(os.path.dirname(url), max_url)
                    write_channel(cnxt, max_url)
Esempio n. 13
0
def make_path(name):
    return o_p.join(os.path.dirname(__file__), name)
Esempio n. 14
0
 def open_xml_article(xml_fname):
     xml_fname = o_p.join(xml_fpath, xml_fname)
     tree = open_xml_tree(xml_fname)
     return tree
Esempio n. 15
0
def main():
    dct_prefix = "/home/ilya/.stardict/dic/esperanto"
    dst_fname = o_p.join(dct_prefix, "REVO_Eksplika/Eksplika-REVO.txt")

    # :REFACTOR:
    dirname = os.path.dirname
    # :REFACTOR: realpath() - apliki tion al ĉiuj uzoj
    prj_fdir = dirname(os.path.realpath(__file__))
    import shutil

    def copy_prj_fname(prj_fname, dst_fpath):
        o_p.force_makedirs(os.path.dirname(dst_fpath))
        shutil.copy(o_p.join(prj_fdir, prj_fname), dst_fpath)

    prefix_eoru = dirname(dirname(prj_fdir))
    unpacked_revo = o_p.join(prefix_eoru, "stuff/revo/revo")

    dictionaries = {}
    with make_gen_accumulator() as add_gen:

        def create_dictionary(dst_fname, css_link=None):
            remove_srcfile = True  # False #
            on_article = add_gen(
                make_kondratjev.dictionary_generator(
                    dst_fname,
                    css_text=None,
                    is_html=True,
                    remove_srcfile=remove_srcfile))
            if css_link:
                orig_on_article = on_article

                def on_article(key_names, txt):
                    css_link
                    txt = """<link href="%(css_link)s" rel="stylesheet" type="text/css" />%(txt)s""" % locals(
                    )
                    return orig_on_article(key_names, txt)

            return on_article

        on_explika_article = create_dictionary(dst_fname, "revo.css")

        res_fdir = o_p.join(dirname(dst_fname), "res")
        copy_prj_fname("sample/revo/revo.css", o_p.join(res_fdir, "revo.css"))
        # kopias figurojn por beleco
        dst_smb = o_p.join(res_fdir, "smb")
        if not o_p.exists(dst_smb):
            shutil.copytree(o_p.join(unpacked_revo, "smb"), dst_smb)

        xml_fpath = o_p.join(unpacked_revo, "xml")

        def open_xml_article(xml_fname):
            xml_fname = o_p.join(xml_fpath, xml_fname)
            tree = open_xml_tree(xml_fname)
            return tree

        def fname2prefix(src_fname):
            return o_p.without_ext(src_fname)

        prefix_dct = {}

        def get_words(prefix):
            words = prefix_dct.get(prefix)
            if words is None:

                words = prefix_dct[prefix] = []
                tree = open_xml_article(prefix + ".xml")

                for drv, headwords in for_drv_words_headwords(tree):
                    words.extend(calc_words(headwords))

                    #print(words)
                    #print(rvut_definitions.get_translations(drv).get("de"))
                    #print()

            return words

        fname_lst = os.listdir(xml_fpath)
        if False:  # True: #
            fname_lst = [
                "ten.xml",
                "distin.xml",
                "apenau.xml",  # <trd> in <subdrv>
                "pri.xml",  # artikolo sen <drv>
                "sur.xml",  # <ekz> ne en <(sub)snc>, sed en <subart>
                "al.xml",  # <trdgrp> ĝuste en <art>
                "stift.xml",  # kaj <ekz> ankaŭ en <art>
                "lima.xml",  # перевод относился к <kap>, хотя был внутри текста (гад с 'la') - и таких статей много
                "kverk.xml",  # diversaj homaj eraroj
                "jxak1.xml",
                "anim.xml",  # <ekz> sen <ind>
                "blank.xml",  #
                "milv.xml",  # <bld> anstataŭ <>ekz
                "hel.xml",  # trdgrp en <dif>
                "hazard.xml",  # malplena trd etikedo
                "iks.xml",  # vortoj kun signo '|'
            ]

        for src_fname in fname_lst:
            prefix = fname2prefix(src_fname)
            all_names = get_words(prefix)

            html_fname = o_p.join(unpacked_revo, "art", prefix + ".html")
            body = make_wells.get_html_body(html_fname, False)

            h1 = body.find("h1")
            hr = body.find("hr")

            div = etree.Element("div")
            el = h1.getnext()
            while el != hr:
                div.append(el)
                el = h1.getnext()

            def append_sub(name):
                sub_el = body.find("div[@class='%(name)s']" % locals())
                if not (sub_el is None):
                    div.append(etree.Element("hr"))
                    div.append(sub_el)

            append_sub("fontoj")
            append_sub("notoj")

            # renovigas referencojn en stilo
            # kapt.html#kapt.0i => bword://kapti#kapt.0i
            for lnk in parse_vip.iter_tags(div, "a"):
                href = lnk.get("href")
                if href:
                    m = re.match(r"(?P<lnk_fname>[^/]+\.html)#(?P<anchor>.+)$",
                                 href)
                    if m:
                        lnk_fname, anchor = m.group("lnk_fname"), m.group(
                            "anchor")
                        lnk_word = get_words(fname2prefix(lnk_fname))[0]
                        # GD ne atentas #anchor, ColorDict - eĉ rifuzas sekvi la ligilon
                        #lnk.set("href", "bword://%(lnk_word)s#%(anchor)s" % locals())
                        lnk.set("href",
                                "bword://%(lnk_word)s#%(anchor)s" % locals())

            # :REFACTOR:
            for img in parse_vip.iter_tags(div, "img"):
                src = img.get("src")
                if src:
                    # egala funkciado por Goldendict (GD) k ColorDict (CD)
                    m = re.match(r"^\.\./", src)
                    if m:
                        img.set("src", src[3:])

            txt = parse_vip.gen_html_text(div)
            #print(txt)
            on_explika_article(all_names, txt)

            # eo-nacia vortaro
            national_dct = {}
            tree = open_xml_article(src_fname)

            def append_translations(translations, src_trs):
                for lang, lst in src_trs.items():
                    translations[lang] = lst + translations.setdefault(
                        lang, [])

            used_tr_nodes = {}
            national_headwords = {}

            def get_count_translations(node):
                res = rvut_definitions.get_translations(node)
                # hazard.xml havas malplena tradukojn
                clean_res = {}
                for lang, lst in res.items():
                    lst = list(filter(bool, lst))
                    if lst:
                        clean_res[lang] = lst
                res = clean_res

                append_translations(national_headwords, res)

                # :REFACTOR:
                for trd in node.findall('trd'):
                    used_tr_nodes[trd] = True

                for trdp in node.findall('trdgrp'):
                    used_tr_nodes[trdp] = True

                    for trd in trdp.findall('trd'):
                        used_tr_nodes[trd] = True

                return res

            def iterate_translations(translations, sub_node_dct,
                                     numerator_func, final_sep):
                for lang in sub_node_dct.keys() | translations.keys():
                    yield lang, gen_trans_text(lang, sub_node_dct,
                                               numerator_func, translations,
                                               final_sep)

            def notify_node(warning_txt, node):
                print(warning_txt, src_fname, parse_vip.gen_html_text(node))

            # :TRICKY: plej simpla maniero por kalkuki jam traktitajn nodojn
            ekz_node_set = set()

            def find_ekz_translations(ekz_dct, node, flat_translations):
                #for trd in parse_vip.iter_tags(node, "ekz/trd|trdgrp"):
                def trd_iter(ekz_name, name):
                    return parse_vip.iter_tags(
                        node, "%(ekz_name)s/%(name)s" % locals())

                def trd_iters(ekz_name):
                    return trd_iter(ekz_name,
                                    "trd"), trd_iter(ekz_name, "trdgrp")

                for trd in itertools.chain(*(trd_iters("ekz") +
                                             trd_iters("bld"))):
                    ekz = trd.getparent()

                    if ekz in ekz_node_set:
                        continue
                    else:
                        ekz_node_set.add(ekz)

                    def make_orig_txt(ind_node):
                        return ', '.join(
                            rvut_words.get_words_from_kap(ind_node))

                    ind_node = ekz.find('ind')
                    if ind_node is None:
                        # kalkulas orig_txt mem, kolektante ĉiujn etikedojn ĝis apero de trd aŭ trdgrp
                        # anim.xml:
                        # <ekz>
                        #  <tld/>ita parolado<fnt>K</fnt>,
                        #  <trd lng="hu">lelkes besz&eacute;d</trd>
                        # </ekz>
                        ind_node = etree.Element("ind")
                        ind_node.text = ekz.text
                        for child in ekz.getchildren():
                            if child.tag in ["trd", "trdgrp"]:
                                break
                            else:
                                child = copy.deepcopy(child)
                                ind_node.append(child)

                        tree.append(ind_node)
                        orig_txt = make_orig_txt(ind_node)
                        ind_node.getparent().remove(ind_node)
                    else:
                        orig_txt = make_orig_txt(ind_node)

                    for lang, tr_lst in get_count_translations(ekz).items():
                        # :REFACTOR:
                        lst = ekz_dct.setdefault(lang, [])

                        tr_lst = ", ".join(tr_lst)
                        ekz_txt = "<i><b>%(orig_txt)s</b>: %(tr_lst)s</i>" % locals(
                        )
                        lst.append(ekz_txt)

                #return

                # :TRICKY: iuj <trd> kumulas tradukon mem k indikon de originala nomo (Latina prezipe) =>
                # nur <trd> povas esti tia, ne <trdgrp>, ĉar tio estas perokula etikedo (angla - tag)
                # malĝuste - hel.xml!
                rest_translations = {}
                for trd in parse_vip.iter_tags(node, "trd"):
                    if trd not in used_tr_nodes:
                        par_node = trd.getparent()
                        if par_node.tag == "trdgrp":
                            lang = par_node.get("lng")

                            used_tr_nodes[par_node] = True
                        else:
                            lang = trd.get("lng")

                        foreign_word = rvut_flatten.flatten_node(trd)
                        if foreign_word:
                            # :REFACTOR:
                            rest_translations.setdefault(
                                lang, []).append(foreign_word)
                        # :REFACTOR:
                        used_tr_nodes[trd] = True
                append_translations(flat_translations, rest_translations)
                append_translations(national_headwords, rest_translations)

            def append_ekz_translations(dct, ekz_dct):
                # :TRICKY: ke lasi subsnc_dct simplan kaj ne ŝanĝi iterater_translations,
                # do ĵuste aldonas ekzemplojn al lasta ero de subsnc_dct
                for lang, ekz_lst in ekz_dct.items():
                    ekz_txt = "; ".join(ekz_lst)
                    lst = dct.setdefault(lang, [])
                    if lst:
                        lst[-1] += "; " + ekz_txt
                    else:
                        lst.append(ekz_txt)

            def append_national_article(lang, names, txt):
                o_p_article, dst_fname = dictionaries.get(lang, (None, None))
                if o_p_article is None:
                    dict_fpath = o_p.join(dct_prefix,
                                          "REVO_%(lang)s" % locals())
                    # :REFACTOR:
                    dst_fname = o_p.join(dict_fpath,
                                         "REVO-%(lang)s.txt" % locals())
                    o_p_article = create_dictionary(dst_fname,
                                                    "revo-traduko.css")
                    dictionaries[lang] = o_p_article, dst_fname

                    copy_prj_fname(
                        "sample/revo/eo-nacia/revo-traduko.css",
                        o_p.join(dict_fpath, "res/revo-traduko.css"))

                o_p_article(names, txt)

            def append_row(translations, snc_dct, headwords, drv):
                # sur.xml: <ekz> povas esti ekster ajna <snc>
                ekz_dct = {}
                find_ekz_translations(ekz_dct, drv, translations)
                append_ekz_translations(translations, ekz_dct)

                assert headwords
                hw_txt = "<b>%s</b>" % "</b>, <b>".join(headwords)

                typ = None
                vspec = drv.find("gra/vspec")
                if vspec is not None:
                    typ = vspec.text

                if typ:
                    hw_txt = "%(hw_txt)s <i>%(typ)s</i>" % locals()

                for lang, tr_txt in iterate_translations(
                        translations, snc_dct, arab_num, " <b>|</b> "):
                    opa_args = national_dct.setdefault(lang, ([], []))

                    names, txt = opa_args
                    names.extend(calc_words(headwords))

                    row_txt = """<div class="paragrafo">%(hw_txt)s %(tr_txt)s</div>""" % locals(
                    )
                    txt.append(row_txt)

                    # nacia-eo article
                    n_keywords = national_headwords.get(lang)
                    assert n_keywords
                    # devas purigi poste originalan n_keywords, ne ŝanĝitan
                    #n_keywords = [word.replace("|", "/") for word in n_keywords]
                    clean_keywords = [
                        word.replace("|", "/") for word in n_keywords
                    ]
                    append_national_article(lang, clean_keywords, row_txt)
                    n_keywords.clear()

            for drv, headwords in for_drv_words_headwords(tree):
                #print(src_fname)
                #print(translations)
                #print()

                def latin_num(i):
                    return "%(chr(ord('a') + i))s)" % s_.EvalFormat()

                snc_dct = {}
                ekz_snc_dct = {}
                for snc in parse_vip.iter_tags(drv, "snc"):
                    subsnc_dct = {}
                    ekz_subsnc_dct = {}
                    for subsnc in parse_vip.iter_tags(snc, "subsnc"):
                        subsnc_translations = get_count_translations(subsnc)
                        for lang, tr_lst in subsnc_translations.items():
                            lst = subsnc_dct.setdefault(lang, [])
                            lst.append(", ".join(tr_lst))

                        find_ekz_translations(ekz_subsnc_dct, subsnc,
                                              subsnc_dct)

                    append_ekz_translations(subsnc_dct, ekz_subsnc_dct)
                    for lang, tr_txt in iterate_translations(
                            get_count_translations(snc), subsnc_dct, latin_num,
                            "; "):
                        lst = snc_dct.setdefault(lang, [])
                        lst.append(tr_txt)

                    find_ekz_translations(ekz_snc_dct, snc, snc_dct)

                def arab_num(i):
                    return "<b>%(i+1)s.</b>" % s_.EvalFormat()

                append_ekz_translations(snc_dct, ekz_snc_dct)

                def merge_trs(translations, drv):
                    src_trs = get_count_translations(drv)
                    append_translations(translations, src_trs)

                # ankoraŭ estas iome da <subdrv> en <drv> => aldonu
                translations = {}
                for subdrv in parse_vip.iter_tags(drv, "subdrv"):
                    merge_trs(translations, subdrv)
                # k subart ankaŭ eblas havi <trd> ĝuste en subart - sur.xml: hu => rá-
                merge_trs(translations, drv)

                append_row(translations, snc_dct, headwords, drv)

            # :TRICKY: al.xml havas tradukojn ekstere subart, drv
            art_node = tree.find("art")
            append_row(get_count_translations(art_node), {},
                       find_kap_words(art_node), art_node)

            for lang, opa_args in national_dct.items():
                names, txt = opa_args
                append_national_article(lang, names, "".join(txt))

            strict_check = False  # True #

            def alarm_not_processed(trd):
                is_ok = trd in used_tr_nodes

                if not is_ok:
                    if strict_check:
                        assert is_ok
                    else:
                        notify_node("Not processed trd:", trd.getparent())

            # kontrolo, ke ĉiuj nodoj estas traktita
            for trd in parse_vip.iter_tags(tree, "trd"):
                alarm_not_processed(trd)
            for trd in parse_vip.iter_tags(tree, "trdgrp"):
                alarm_not_processed(trd)

    # zip'u vortarojn
    revo_dicts_fpath = o_p.join(dirname(unpacked_revo), "revo-dicts")
    o_p.force_makedirs(revo_dicts_fpath)
    # shutil povas zipfile!
    #import zipfile

    print("\nAtingeblaj REVO vortaroj:")

    def zip_dict(dst_fname):
        dir_fpath, basename = os.path.split(dst_fname)
        root_dir, dir_fname = os.path.split(dir_fpath)

        # uzado de zip ne funkciigas, rompas Colordict - t.e. la lasta komencas blinki senfine
        #fmt = "zip"
        fmt = "gztar"

        # se vortara datumo estas ĝuste en arkivo, ne en ia dosierujo, do CSS/figuroj estas ne trovataj
        save_without_folder = False  # True #
        if save_without_folder:
            fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname),
                                        fmt, dir_fpath)
        else:
            fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname),
                                        fmt,
                                        root_dir,
                                        base_dir=dir_fname)

        ifo_fname = os.path.splitext(dst_fname)[0] + ".ifo"
        with open(ifo_fname) as ifo_f:
            properties = {}
            for line in ifo_f:
                lst = line.split("=")
                if len(lst) >= 2:
                    key, value = lst[0].strip(), lst[1].strip()
                    if key and value:
                        properties[key] = value

        words_cnt = int(properties.get("wordcount"))
        synwordcount = properties.get("synwordcount")
        if synwordcount:
            words_cnt += int(synwordcount)
        fname = os.path.basename(fname)
        # du spacetoj fine estas por Markdown liniavanco
        print(
            "http://new.bombono.org/download/revo/%(fname)s\t%(words_cnt)s  " %
            locals())

    zip_dict(dst_fname)
    for lang, (func, dst_fname) in dictionaries.items():
        zip_dict(dst_fname)
Esempio n. 16
0
def main():
    prefix = "/home/ilya/.stardict/dic/esperanto"
    make_wells(15, 42, o_p.join(prefix, "wells-eo-en/wells-eo-en.txt"))
    make_wells(44, 69, o_p.join(prefix, "wells-en-eo/wells-en-eo.txt"))
Esempio n. 17
0
        type=bool, default=False,
    )
    
    parser.add_argument("src_path", help="where is ffmpeg/libav source directory")
    args = parser.parse_args()
    
    is_libav = args.is_libav
    src_dir  = os.path.abspath(args.src_path)
    if is_libav:
        add_opts = ""
    else:
        if args.is_debug:
            add_opts = "--disable-stripping"
        else:
            add_opts = ""
    bld_dir = o_p.join(src_dir, "objs")

    if os.path.exists(bld_dir):
        import shutil
        shutil.rmtree(bld_dir)
        
    o_p.force_makedirs(bld_dir)
    inst_dir = o_p.join(bld_dir, "inst")
    
    enable_x264 = True
    x264_support = "--enable-libx264 --enable-gpl" if enable_x264 else ""
    if x264_support:
        x264_support = " " + x264_support
    
    debug_opts = "--disable-optimizations --extra-cflags='-O0 -g' " if args.is_debug else ""
    call_cmd("../configure --prefix=%(inst_dir)s %(debug_opts)s--extra-ldflags='-Wl,-rpath=%(inst_dir)s/lib' \
Esempio n. 18
0
    def parse_dictionary(on_parsed_article):
        dirname = os.path.dirname
        prefix_eoru = dirname(dirname(dirname(__file__)))
        unpacked_epub = o_p.join(prefix_eoru, "stuff/Wells/decrypted/depacked")

        for num in range(first_num, last_num + 1):
            #num = 29 # K
            src_fname = o_p.join(unpacked_epub, "OEBPS/%03d.html" % num)
            body = get_html_body(src_fname, True)

            found_empty_p = False
            # альтернативный способ - .getchildren() + проверка .tag
            for p in body.iterfind(xhtml_tag("p")):
                txt = p.text_content().strip()

                if found_empty_p and txt:
                    # очередная статья
                    #print(txt)

                    radix = None
                    lst = []

                    def on_word(word):
                        # <b>Kaboverd/o</b><b> </b>Cape Verde
                        if word:
                            lst.append(word)

                    key_elements = list(parse_vip.iter_tags(p, xhtml_tag("b")))
                    assert key_elements

                    for idx, el in enumerate(key_elements):
                        bold_txt = el.text_content().strip()
                        exceptions = [
                            "li diris, ~ ŝi atendas",  # ke
                            "~e, ke",  # kondiĉo 
                        ]

                        # "2" - kluso
                        def is_number(txt):
                            res = True
                            try:
                                int(txt)
                            except:
                                res = False
                            return res

                        if bold_txt in exceptions or is_number(bold_txt):
                            w_lst = []  # [bold_txt]
                        else:
                            w_lst = [w.strip() for w in bold_txt.split(",")]

                        def remove_bad_suffix(w):
                            for suffix in [
                                    ":",  # boarding:
                                    " 1",  # can 1
                            ]:
                                if w.endswith(suffix):
                                    w = w[:-len(suffix)]
                            return w

                        # только первое слово - корень
                        # kost/i, ~o cost; multe~a expensive
                        if radix is None:
                            radix = w_lst[0]
                            slash = radix.find("/")
                            if slash >= 0:
                                radix = radix[:slash]

                            radix = remove_bad_suffix(radix)

                        for w in w_lst:
                            for no_tilda_pattern in [
                                    "(aerarmea) generalo",  # air
                                    "koncerne (with accus)",  # as
                                    "~ on daŭri",  # run
                            ]:
                                if idx != 0 and w.find("~") == -1 and txt.find(
                                        no_tilda_pattern) != -1:
                                    w = "~ " + w

                            # :TRICKY: некоторые термины содержат " ~ ", но без
                            # ручного анализа правильное значение не подставишь:
                            # - lav/i wash tr; ~ sin get washed, wash (oneself)
                            # - est/i be; ~as (there) is/are; kio ~ al vi? what’s the matter? [skip]
                            w = w.replace("/", "").replace("~", radix)

                            # Kaliforni/o California; ≈o californium
                            change_case = w.find("≈") >= 0
                            if change_case:
                                w = w.replace("≈", radix)
                                # :REFACTOR:
                                w = w[0].swapcase() + w[1:]

                            # digital/o 1 digitalis, foxglove; 2 ~a img2.png digital [= cifereca]
                            if w.startswith("2 "):
                                w = w[2:]
                            w = remove_bad_suffix(w)

                            # Prote/o Proteus; ≈a protean; ≈o 1 protea (flower); 2 olm (amphibian)
                            # errors needs to be fixed by upstream
                            if w in ['a', 'o']:
                                continue

                            if w == 'la' and txt.find(
                                    "da is not used before la, other") != -1:
                                continue

                            make_kondratjev.with_x_hdw(w, on_word)

                        is_first = False

                    assert lst
                    #print(lst)
                    on_parsed_article(lst, parse_vip.gen_html_text(p))  # txt)

                if not txt:
                    found_empty_p = True
Esempio n. 19
0
def main():
    prefix = "/home/ilya/.stardict/dic/esperanto"
    make_wells(15, 42, o_p.join(prefix, "wells-eo-en/wells-eo-en.txt"))
    make_wells(44, 69, o_p.join(prefix, "wells-en-eo/wells-en-eo.txt"))
Esempio n. 20
0
 def make_fpath(fname):
     return o_p.join(fdir, fname)
Esempio n. 21
0
def setup(cont_name, keyfile, image, need_install_python=False, **kwargs):
    with open(keyfile) as f:
        pub_key = f.read()

    # добавляем директорию ansible_plugins в список, где искать доп. расширения
    # (до run_api())
    from ansible import utils
    import o_p
    import os
    get_par = os.path.dirname
    utils.plugins.push_basedir(
        o_p.join(get_par(get_par(__file__)), "ansible_plugins"))

    import ans_module
    labels = {"disvolvu": "test"}
    # настраиваем docker без with_sudo=True
    res = ans_module.run_api("docker",
                             name=cont_name,
                             image=image,
                             labels=labels,
                             docker_api_version="auto",
                             **kwargs)
    assert res

    client = create_dclient()
    cont = find_container(cont_name, client)
    assert cont
    ip_addr = update_ssh_config.get_ipaddr(cont, client)
    #print(ip_addr)

    # удаляем предыдущий возможный fingerprint
    known_hosts = os.path.expanduser("~/.ssh/known_hosts")
    run_command("""ssh-keygen -f "%(known_hosts)s" -R %(ip_addr)s""" %
                locals())

    # добавляем SSH-публичный ключ хоста в known_hosts, чтобы
    # ssh не ругался при неинтерактивном соединении
    from ansible.module_utils.known_hosts import add_git_host_key

    # :TRICKY: настраиваем фейковый модуль
    from ansible.module_utils.basic import AnsibleModule
    ans_module.setup_module_arguments(None)
    fake_module = AnsibleModule({})

    # :KLUDGE: сделать нормальную функцию без имитации git-адреса
    add_git_host_key(fake_module, "git@%(ip_addr)s/" % locals())

    #
    standard_password = "******"

    if need_install_python:
        run_command(
            "sshpass -p %(standard_password)s ssh root@%(ip_addr)s apt-get -y install python"
            % locals())

    # в Ubuntu пакет python уже есть, поэтому
    # сразу к делу - authorized_key
    run_module([
        {
            "host": ip_addr,
            "settings": {
                "ansible_ssh_user": "******",
                "ansible_ssh_pass": "******"
            },
        },
    ],
               "authorized_key",
               '',
               user="******",
               key=pub_key)

    update_ssh_config.main()
Esempio n. 22
0
        parser.add_argument(name, help=help)

    add_arg("src_file_dir", "source file directory")
    add_arg("src_basename", "source file basename (w/o extension)")
    add_arg("build_dir", "build directory for make")
    args = parser.parse_args()
    #print args

    is_eq = True
    ln = min(len(args.build_dir), len(args.src_file_dir))
    for i in range(ln):
        if args.build_dir[i] != args.src_file_dir[i]:
            eq_len = i
            is_eq = False
            break

    if is_eq:
        eq_len = ln

    import o_p
    path = o_p.join(args.src_file_dir[eq_len:], args.src_basename + ".o")

    args = "/usr/bin/make", [path]
    print("Running:", *args)
    # для Qt Creator'а требуется flush, а то не покажется Running:
    import sys
    sys.stdout.flush()

    import call_cmd
    call_cmd.exec_process(*args, run_new_process=False)
Esempio n. 23
0
def setup(cont_name, keyfile, image, need_install_python=False, **kwargs):
    with open(keyfile) as f:
        pub_key = f.read()

    # добавляем директорию ansible_plugins в список, где искать доп. расширения
    # (до run_api())
    from ansible import utils
    import o_p
    import os
    get_par = os.path.dirname
    utils.plugins.push_basedir(o_p.join(get_par(get_par(__file__)), "ansible_plugins"))

    import ans_module
    labels = {
        "disvolvu": "test"
    }    
    # настраиваем docker без with_sudo=True
    res = ans_module.run_api("docker", name=cont_name, image=image,
                             labels=labels, docker_api_version="auto", **kwargs)
    assert res
    
    client = create_dclient()
    cont = find_container(cont_name, client)
    assert cont
    ip_addr = update_ssh_config.get_ipaddr(cont, client)
    #print(ip_addr)
    
    # удаляем предыдущий возможный fingerprint
    known_hosts = os.path.expanduser("~/.ssh/known_hosts")
    run_command("""ssh-keygen -f "%(known_hosts)s" -R %(ip_addr)s""" % locals())
    
    # добавляем SSH-публичный ключ хоста в known_hosts, чтобы
    # ssh не ругался при неинтерактивном соединении
    from ansible.module_utils.known_hosts import add_git_host_key
    
    # :TRICKY: настраиваем фейковый модуль
    from ansible.module_utils.basic import AnsibleModule
    ans_module.setup_module_arguments(None)
    fake_module = AnsibleModule({})
    
    # :KLUDGE: сделать нормальную функцию без имитации git-адреса
    add_git_host_key(fake_module, "git@%(ip_addr)s/" % locals())
    
    # 
    standard_password = "******"
    
    if need_install_python:
        run_command("sshpass -p %(standard_password)s ssh root@%(ip_addr)s apt-get -y install python" % locals())
    
    # в Ubuntu пакет python уже есть, поэтому
    # сразу к делу - authorized_key
    run_module([
        {
            "host": ip_addr,
            "settings": {
                "ansible_ssh_user": "******",
                "ansible_ssh_pass": "******"
            },
            
        },
    ], "authorized_key", '', user="******", key=pub_key)
    
    update_ssh_config.main()
Esempio n. 24
0
 def copy_prj_fname(prj_fname, dst_fpath):
     o_p.force_makedirs(os.path.dirname(dst_fpath))
     shutil.copy(o_p.join(prj_fdir, prj_fname), dst_fpath)
Esempio n. 25
0
 def copy_prj_fname(prj_fname, dst_fpath):
     o_p.force_makedirs(os.path.dirname(dst_fpath))
     shutil.copy(o_p.join(prj_fdir, prj_fname), dst_fpath)
Esempio n. 26
0
 def open_xml_article(xml_fname):
     xml_fname = o_p.join(xml_fpath, xml_fname)
     tree = open_xml_tree(xml_fname)
     return tree
Esempio n. 27
0
    def parse_dictionary(on_parsed_article):
        dirname = os.path.dirname
        prefix_eoru = dirname(dirname(dirname(__file__)))
        unpacked_epub = o_p.join(prefix_eoru, "stuff/Wells/decrypted/depacked")
             
        for num in range(first_num, last_num+1):
            #num = 29 # K
            src_fname = o_p.join(unpacked_epub, "OEBPS/%03d.html" % num)
            body = get_html_body(src_fname, True)
    
            found_empty_p = False
            # альтернативный способ - .getchildren() + проверка .tag
            for p in body.iterfind(xhtml_tag("p")):
                txt = p.text_content().strip()
        
                if found_empty_p and txt:
                    # очередная статья
                    #print(txt)
                    
                    radix = None
                    lst = []
                    def on_word(word):
                        # <b>Kaboverd/o</b><b> </b>Cape Verde
                        if word:
                            lst.append(word)
                     
                    key_elements = list(parse_vip.iter_tags(p, xhtml_tag("b")))
                    assert key_elements
                    
                    for idx, el in enumerate(key_elements):
                        bold_txt = el.text_content().strip()
                        exceptions = [
                            "li diris, ~ ŝi atendas", # ke
                            "~e, ke", # kondiĉo 
                        ]

                        # "2" - kluso
                        def is_number(txt):
                            res = True
                            try:
                                int(txt)
                            except:
                                res = False
                            return res
                        
                        if bold_txt in exceptions or is_number(bold_txt):
                            w_lst = [] # [bold_txt]
                        else:
                            w_lst = [w.strip() for w in bold_txt.split(",")]
                            
                        def remove_bad_suffix(w):
                            for suffix in [
                                ":",  # boarding:
                                " 1", # can 1
                            ]:
                                if w.endswith(suffix):
                                    w = w[:-len(suffix)]
                            return w
        
                        # только первое слово - корень
                        # kost/i, ~o cost; multe~a expensive
                        if radix is None:
                            radix = w_lst[0]
                            slash = radix.find("/")
                            if slash >= 0:
                                radix = radix[:slash]
                                
                            radix = remove_bad_suffix(radix)
        
                        for w in w_lst:
                            for no_tilda_pattern in [
                                "(aerarmea) generalo", # air
                                "koncerne (with accus)", # as
                                "~ on daŭri", # run
                            ]:
                                if idx != 0 and w.find("~") == -1 and txt.find(no_tilda_pattern) != -1:
                                    w = "~ " + w
                            
                            # :TRICKY: некоторые термины содержат " ~ ", но без
                            # ручного анализа правильное значение не подставишь:
                            # - lav/i wash tr; ~ sin get washed, wash (oneself)
                            # - est/i be; ~as (there) is/are; kio ~ al vi? what’s the matter? [skip]
                            w = w.replace("/", "").replace("~", radix)
                            
                            # Kaliforni/o California; ≈o californium
                            change_case = w.find("≈") >= 0
                            if change_case:
                                w = w.replace("≈", radix)
                                # :REFACTOR:
                                w = w[0].swapcase() + w[1:]
                                
                            # digital/o 1 digitalis, foxglove; 2 ~a img2.png digital [= cifereca]
                            if w.startswith("2 "):
                                w = w[2:]
                            w = remove_bad_suffix(w)
                                
                            # Prote/o Proteus; ≈a protean; ≈o 1 protea (flower); 2 olm (amphibian)
                            # errors needs to be fixed by upstream
                            if w in ['a', 'o']:
                                continue
                            
                            if w == 'la' and txt.find("da is not used before la, other") != -1:
                                continue
                            
                            make_kondratjev.with_x_hdw(w, on_word)
                        
                        is_first = False
                    
                    assert lst
                    #print(lst)
                    on_parsed_article(lst, parse_vip.gen_html_text(p)) # txt)
                
                if not txt:
                    found_empty_p = True
Esempio n. 28
0
def make_dpath(*fname):
    #download_data_fdir = o_p.join(project_dir_fname, "download_data")
    
    assert download_data_fdir, "call setup_download_data() first"
    return o_p.join(download_data_fdir, *fname)
Esempio n. 29
0
        parser.add_argument(name, help=help)
        
    add_arg("src_file_dir", "source file directory")
    add_arg("src_basename", "source file basename (w/o extension)")
    add_arg("build_dir",    "build directory for make")
    args = parser.parse_args()
    #print args
    
    is_eq = True
    ln = min(len(args.build_dir), len(args.src_file_dir))
    for i in range(ln):
        if args.build_dir[i] != args.src_file_dir[i]:
            eq_len = i
            is_eq = False
            break
    
    if is_eq:
        eq_len = ln
    
    import o_p
    path = o_p.join(args.src_file_dir[eq_len:], args.src_basename + ".o")

    args = "/usr/bin/make", [path]
    print("Running:", *args)
    # для Qt Creator'а требуется flush, а то не покажется Running:
    import sys
    sys.stdout.flush()
    
    import call_cmd
    call_cmd.exec_process(*args, run_new_process=False)
Esempio n. 30
0
def main():
    dct_prefix = "/home/ilya/.stardict/dic/esperanto"
    dst_fname = o_p.join(dct_prefix, "REVO_Eksplika/Eksplika-REVO.txt")
    
    # :REFACTOR:
    dirname = os.path.dirname
    # :REFACTOR: realpath() - apliki tion al ĉiuj uzoj
    prj_fdir = dirname(os.path.realpath(__file__))
    import shutil
    def copy_prj_fname(prj_fname, dst_fpath):
        o_p.force_makedirs(os.path.dirname(dst_fpath))
        shutil.copy(o_p.join(prj_fdir, prj_fname), dst_fpath)
    
    prefix_eoru = dirname(dirname(prj_fdir))
    unpacked_revo = o_p.join(prefix_eoru, "stuff/revo/revo")

    dictionaries = {}
    with make_gen_accumulator() as add_gen:
        def create_dictionary(dst_fname, css_link=None):
            remove_srcfile = True # False # 
            on_article = add_gen(make_kondratjev.dictionary_generator(dst_fname, css_text=None, is_html=True, remove_srcfile=remove_srcfile))
            if css_link:
                orig_on_article = on_article
                def on_article(key_names, txt):
                    css_link
                    txt = """<link href="%(css_link)s" rel="stylesheet" type="text/css" />%(txt)s""" % locals()
                    return orig_on_article(key_names, txt)
            return on_article
        on_explika_article = create_dictionary(dst_fname, "revo.css")
        
        res_fdir = o_p.join(dirname(dst_fname), "res")
        copy_prj_fname("sample/revo/revo.css", o_p.join(res_fdir, "revo.css"))
        # kopias figurojn por beleco
        dst_smb = o_p.join(res_fdir, "smb")
        if not o_p.exists(dst_smb):
            shutil.copytree(o_p.join(unpacked_revo, "smb"), dst_smb)

        xml_fpath = o_p.join(unpacked_revo, "xml")
        def open_xml_article(xml_fname):
            xml_fname = o_p.join(xml_fpath, xml_fname)
            tree = open_xml_tree(xml_fname)
            return tree
        
        def fname2prefix(src_fname):
            return o_p.without_ext(src_fname)
        
        prefix_dct = {}
        def get_words(prefix):
            words = prefix_dct.get(prefix)
            if words is None:
                
                words = prefix_dct[prefix] = []
                tree = open_xml_article(prefix + ".xml")

                for drv, headwords in for_drv_words_headwords(tree):
                    words.extend(calc_words(headwords))

                    #print(words)
                    #print(rvut_definitions.get_translations(drv).get("de"))
                    #print()
                    
            return words
        
        fname_lst = os.listdir(xml_fpath)
        if False: # True: # 
            fname_lst = [
                "ten.xml", 
                "distin.xml",
                "apenau.xml", # <trd> in <subdrv>
                "pri.xml",    # artikolo sen <drv>
                "sur.xml",    # <ekz> ne en <(sub)snc>, sed en <subart>
                "al.xml",     # <trdgrp> ĝuste en <art>
                "stift.xml",  # kaj <ekz> ankaŭ en <art>
                
                "lima.xml", # перевод относился к <kap>, хотя был внутри текста (гад с 'la') - и таких статей много
                
                "kverk.xml",  # diversaj homaj eraroj
                "jxak1.xml",
                
                "anim.xml",   # <ekz> sen <ind>
                "blank.xml",  #
                
                "milv.xml",   # <bld> anstataŭ <>ekz
                
                "hel.xml",    # trdgrp en <dif>
                "hazard.xml", # malplena trd etikedo
                "iks.xml",    # vortoj kun signo '|'
            ]
        
        for src_fname in fname_lst:
            prefix = fname2prefix(src_fname)
            all_names = get_words(prefix)
                    
            html_fname = o_p.join(unpacked_revo, "art", prefix + ".html")
            body = make_wells.get_html_body(html_fname, False)
            
            h1 = body.find("h1")
            hr = body.find("hr")
            
            div = etree.Element("div")
            el = h1.getnext()
            while el != hr:
                div.append(el)
                el = h1.getnext()
                
            def append_sub(name):
                sub_el = body.find("div[@class='%(name)s']" % locals())
                if not(sub_el is None):
                    div.append(etree.Element("hr"))
                    div.append(sub_el)
                    
            append_sub("fontoj")
            append_sub("notoj")
            
            # renovigas referencojn en stilo 
            # kapt.html#kapt.0i => bword://kapti#kapt.0i
            for lnk in parse_vip.iter_tags(div, "a"):
                href = lnk.get("href")
                if href:
                    m = re.match(r"(?P<lnk_fname>[^/]+\.html)#(?P<anchor>.+)$", href)
                    if m:
                        lnk_fname, anchor = m.group("lnk_fname"), m.group("anchor")
                        lnk_word = get_words(fname2prefix(lnk_fname))[0]
                        # GD ne atentas #anchor, ColorDict - eĉ rifuzas sekvi la ligilon
                        #lnk.set("href", "bword://%(lnk_word)s#%(anchor)s" % locals())
                        lnk.set("href", "bword://%(lnk_word)s#%(anchor)s" % locals())

            # :REFACTOR:
            for img in parse_vip.iter_tags(div, "img"):
                src = img.get("src")
                if src:
                    # egala funkciado por Goldendict (GD) k ColorDict (CD)
                    m = re.match(r"^\.\./", src)
                    if m:
                        img.set("src", src[3:])
                
            txt = parse_vip.gen_html_text(div)
            #print(txt)
            on_explika_article(all_names, txt)
            
            # eo-nacia vortaro
            national_dct = {}
            tree = open_xml_article(src_fname)

            def append_translations(translations, src_trs):
                for lang, lst in src_trs.items():
                    translations[lang] = lst + translations.setdefault(lang, [])
         
            used_tr_nodes = {}
            national_headwords = {}
            def get_count_translations(node):
                res = rvut_definitions.get_translations(node)
                # hazard.xml havas malplena tradukojn
                clean_res = {}
                for lang, lst in res.items():
                    lst = list(filter(bool, lst))
                    if lst:
                        clean_res[lang] = lst
                res = clean_res
                
                append_translations(national_headwords, res)
                
                # :REFACTOR:
                for trd in node.findall('trd'):
                    used_tr_nodes[trd] = True

                for trdp in node.findall('trdgrp'):
                    used_tr_nodes[trdp] = True

                    for trd in trdp.findall('trd'):
                        used_tr_nodes[trd] = True
                
                return res
            
            def iterate_translations(translations, sub_node_dct, numerator_func, final_sep):
                for lang in sub_node_dct.keys() | translations.keys():
                    yield lang, gen_trans_text(lang, sub_node_dct, numerator_func, translations, final_sep)
                    
            def notify_node(warning_txt, node):
                print(warning_txt, src_fname, parse_vip.gen_html_text(node))
                    
            # :TRICKY: plej simpla maniero por kalkuki jam traktitajn nodojn
            ekz_node_set = set()
            def find_ekz_translations(ekz_dct, node, flat_translations):
                #for trd in parse_vip.iter_tags(node, "ekz/trd|trdgrp"):
                def trd_iter(ekz_name, name):
                    return parse_vip.iter_tags(node, "%(ekz_name)s/%(name)s" % locals())
                def trd_iters(ekz_name):
                    return trd_iter(ekz_name, "trd"), trd_iter(ekz_name, "trdgrp")
                for trd in itertools.chain(*(trd_iters("ekz") + trd_iters("bld"))):
                    ekz = trd.getparent()
                    
                    if ekz in ekz_node_set:
                        continue
                    else:
                        ekz_node_set.add(ekz)

                    def make_orig_txt(ind_node):
                        return ', '.join(rvut_words.get_words_from_kap(ind_node))
                    
                    ind_node = ekz.find('ind')
                    if ind_node is None:
                        # kalkulas orig_txt mem, kolektante ĉiujn etikedojn ĝis apero de trd aŭ trdgrp
                        # anim.xml:
                        # <ekz>
                        #  <tld/>ita parolado<fnt>K</fnt>,
                        #  <trd lng="hu">lelkes besz&eacute;d</trd>
                        # </ekz>                        
                        ind_node = etree.Element("ind")
                        ind_node.text = ekz.text
                        for child in ekz.getchildren():
                            if child.tag in ["trd", "trdgrp"]:
                                break
                            else:
                                child = copy.deepcopy(child)
                                ind_node.append(child)
                                
                        tree.append(ind_node)
                        orig_txt = make_orig_txt(ind_node)
                        ind_node.getparent().remove(ind_node)
                    else:
                        orig_txt = make_orig_txt(ind_node)
                    
                    for lang, tr_lst in get_count_translations(ekz).items():
                        # :REFACTOR:
                        lst = ekz_dct.setdefault(lang, [])
                        
                        tr_lst = ", ".join(tr_lst)
                        ekz_txt = "<i><b>%(orig_txt)s</b>: %(tr_lst)s</i>" % locals()
                        lst.append(ekz_txt)
                    
                #return
                
                # :TRICKY: iuj <trd> kumulas tradukon mem k indikon de originala nomo (Latina prezipe) =>
                # nur <trd> povas esti tia, ne <trdgrp>, ĉar tio estas perokula etikedo (angla - tag)
                # malĝuste - hel.xml!
                rest_translations = {}
                for trd in parse_vip.iter_tags(node, "trd"):
                    if trd not in used_tr_nodes:
                        par_node = trd.getparent()
                        if par_node.tag == "trdgrp":
                            lang = par_node.get("lng")
                            
                            used_tr_nodes[par_node] = True
                        else:
                            lang = trd.get("lng")
                        
                        foreign_word = rvut_flatten.flatten_node(trd)
                        if foreign_word:
                            # :REFACTOR:
                            rest_translations.setdefault(lang, []).append(foreign_word)
                        # :REFACTOR:
                        used_tr_nodes[trd] = True
                append_translations(flat_translations, rest_translations)
                append_translations(national_headwords, rest_translations)
            
            def append_ekz_translations(dct, ekz_dct):
                # :TRICKY: ke lasi subsnc_dct simplan kaj ne ŝanĝi iterater_translations,
                # do ĵuste aldonas ekzemplojn al lasta ero de subsnc_dct
                for lang, ekz_lst in ekz_dct.items():
                    ekz_txt = "; ".join(ekz_lst)
                    lst = dct.setdefault(lang, [])
                    if lst:
                        lst[-1] += "; " + ekz_txt
                    else:
                        lst.append(ekz_txt)

            def append_national_article(lang, names, txt):
                o_p_article, dst_fname = dictionaries.get(lang, (None, None))
                if o_p_article is None:
                    dict_fpath = o_p.join(dct_prefix, "REVO_%(lang)s" % locals())
                    # :REFACTOR:
                    dst_fname = o_p.join(dict_fpath, "REVO-%(lang)s.txt" % locals())
                    o_p_article = create_dictionary(dst_fname, "revo-traduko.css")
                    dictionaries[lang] = o_p_article, dst_fname
                    
                    copy_prj_fname("sample/revo/eo-nacia/revo-traduko.css", o_p.join(dict_fpath, "res/revo-traduko.css"))
                    
                o_p_article(names, txt)

            def append_row(translations, snc_dct, headwords, drv):
                # sur.xml: <ekz> povas esti ekster ajna <snc>
                ekz_dct = {}
                find_ekz_translations(ekz_dct, drv, translations)
                append_ekz_translations(translations, ekz_dct)
                 
                assert headwords
                hw_txt = "<b>%s</b>" % "</b>, <b>".join(headwords)
                
                typ = None
                vspec = drv.find("gra/vspec")
                if vspec is not None:
                    typ = vspec.text
                    
                if typ:
                    hw_txt = "%(hw_txt)s <i>%(typ)s</i>" % locals()
                
                for lang, tr_txt in iterate_translations(translations, snc_dct, arab_num, " <b>|</b> "):
                    opa_args = national_dct.setdefault(lang, ([], []))
                        
                    names, txt = opa_args
                    names.extend(calc_words(headwords))
                    
                    row_txt = """<div class="paragrafo">%(hw_txt)s %(tr_txt)s</div>""" % locals()
                    txt.append(row_txt)
                    
                    # nacia-eo article
                    n_keywords = national_headwords.get(lang)
                    assert n_keywords
                    # devas purigi poste originalan n_keywords, ne ŝanĝitan
                    #n_keywords = [word.replace("|", "/") for word in n_keywords]
                    clean_keywords = [word.replace("|", "/") for word in n_keywords]
                    append_national_article(lang, clean_keywords, row_txt)
                    n_keywords.clear()

            for drv, headwords in for_drv_words_headwords(tree):
                #print(src_fname)
                #print(translations)
                #print()
                
                def latin_num(i):
                    return "%(chr(ord('a') + i))s)" % s_.EvalFormat()
                snc_dct = {}
                ekz_snc_dct = {}
                for snc in parse_vip.iter_tags(drv, "snc"):
                    subsnc_dct = {}
                    ekz_subsnc_dct = {}
                    for subsnc in parse_vip.iter_tags(snc, "subsnc"):
                        subsnc_translations = get_count_translations(subsnc)
                        for lang, tr_lst in subsnc_translations.items():
                            lst = subsnc_dct.setdefault(lang, [])
                            lst.append(", ".join(tr_lst))
                            
                        find_ekz_translations(ekz_subsnc_dct, subsnc, subsnc_dct)
                        
                    append_ekz_translations(subsnc_dct, ekz_subsnc_dct)
                    for lang, tr_txt in iterate_translations(get_count_translations(snc), subsnc_dct, latin_num, "; "):
                        lst = snc_dct.setdefault(lang, [])
                        lst.append(tr_txt)
                        
                    find_ekz_translations(ekz_snc_dct, snc, snc_dct)
                    
                def arab_num(i):
                    return "<b>%(i+1)s.</b>" % s_.EvalFormat()

                append_ekz_translations(snc_dct, ekz_snc_dct)
                
                def merge_trs(translations, drv):
                    src_trs = get_count_translations(drv)
                    append_translations(translations, src_trs)
                    
                # ankoraŭ estas iome da <subdrv> en <drv> => aldonu
                translations = {}
                for subdrv in parse_vip.iter_tags(drv, "subdrv"):
                    merge_trs(translations, subdrv)
                # k subart ankaŭ eblas havi <trd> ĝuste en subart - sur.xml: hu => rá-
                merge_trs(translations, drv)

                append_row(translations, snc_dct, headwords, drv)

            # :TRICKY: al.xml havas tradukojn ekstere subart, drv
            art_node = tree.find("art")
            append_row(get_count_translations(art_node), {}, find_kap_words(art_node), art_node)
            
            for lang, opa_args in national_dct.items():
                names, txt = opa_args
                append_national_article(lang, names, "".join(txt))
                
            strict_check = False # True # 
            def alarm_not_processed(trd):
                is_ok = trd in used_tr_nodes
                
                if not is_ok:
                    if strict_check:
                        assert is_ok
                    else:
                        notify_node("Not processed trd:", trd.getparent())

            # kontrolo, ke ĉiuj nodoj estas traktita
            for trd in parse_vip.iter_tags(tree, "trd"):
                alarm_not_processed(trd)
            for trd in parse_vip.iter_tags(tree, "trdgrp"):
                alarm_not_processed(trd)
    
    # zip'u vortarojn
    revo_dicts_fpath = o_p.join(dirname(unpacked_revo), "revo-dicts")
    o_p.force_makedirs(revo_dicts_fpath)
    # shutil povas zipfile!
    #import zipfile
        
    print("\nAtingeblaj REVO vortaroj:")
    
    def zip_dict(dst_fname):
        dir_fpath, basename = os.path.split(dst_fname)
        root_dir, dir_fname = os.path.split(dir_fpath)
        
        # uzado de zip ne funkciigas, rompas Colordict - t.e. la lasta komencas blinki senfine
        #fmt = "zip"
        fmt = "gztar"
        
        # se vortara datumo estas ĝuste en arkivo, ne en ia dosierujo, do CSS/figuroj estas ne trovataj
        save_without_folder = False # True # 
        if save_without_folder:
            fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname), fmt, dir_fpath)
        else:
            fname = shutil.make_archive(o_p.join(revo_dicts_fpath, dir_fname), fmt, root_dir, base_dir=dir_fname)
        
        ifo_fname = os.path.splitext(dst_fname)[0] + ".ifo"
        with open(ifo_fname) as ifo_f:
            properties = {}
            for line in ifo_f:
                lst = line.split("=")
                if len(lst) >= 2:
                    key, value = lst[0].strip(), lst[1].strip()
                    if key and value:
                        properties[key] = value
        
        words_cnt = int(properties.get("wordcount"))
        synwordcount = properties.get("synwordcount")
        if synwordcount:
            words_cnt +=  int(synwordcount)
        fname = os.path.basename(fname)
        # du spacetoj fine estas por Markdown liniavanco
        print("http://new.bombono.org/download/revo/%(fname)s\t%(words_cnt)s  " % locals())
        
    zip_dict(dst_fname)
    for lang, (func, dst_fname) in dictionaries.items():
        zip_dict(dst_fname)