Beispiel #1
0
    def _select_concepts (self):

        gloss, lang, env = self._gloss, self._lang, self._env

        # Select concepts to present by having a term in pivotal langenv.
        selected_concepts = {}
        for ckey, concept in self._gloss.concepts.iteritems():
            if lang in concept.term.langs():
                if env in concept.term.envs(lang):
                    selected_concepts[ckey] = concept

        # Sort presentable concepts by concept key.
        ordering_links = []
        for ckey, concept in selected_concepts.iteritems():
            ordering_links.append((ckey, concept))
        langsort_tuples(ordering_links, 0, lang)

        return [concept for ckey, concept in ordering_links]
Beispiel #2
0
    def __call__ (self, gloss):

        # Resolve languages and environment.
        olang = self._options.olang
        if olang not in gloss.languages:
            error(p_("error message",
                     "origin language '%(lang)s' not present in the glossary")
                    % dict(lang=olang))
        tlang = self._options.tlang
        if tlang not in gloss.languages:
            error(p_("error message",
                     "target language '%(lang)s' not present in the glossary")
                    % dict(lang=tlang))
        env = self._options.env or gloss.env[0]
        if env is not None and env not in gloss.environments:
            error(p_("error message",
                     "environment '%(env)s' not defined by the glossary")
                  % dict(env=env))

        # Formatters for resolving glossary into plain text.
        tft = TextFormatterPlain(gloss, lang=tlang, env=env)
        s_desc = p_("message comment in the PO view: "
                    "short label preceeding a concept description",
                    "desc: ")
        cpref = "# "
        tfds = TextFormatterPlain(gloss, lang=tlang, env=env,
                                  first_indent=(cpref + s_desc),
                                  indent=(cpref + " " * len(s_desc)),
                                  wcol=79)
        s_decl = p_("message comment in the PO view: "
                    "short label preceeding a declination",
                    "decl: ")
        tfdl = TextFormatterPlain(gloss, lang=tlang, env=env,
                                  prefix=(cpref + s_decl))

        # Select all concepts which have a term in both langenvs.
        # Collect terms from the origin language for lexicographical ordering.
        concepts = {}
        ordering_links = []
        for ckey, concept in gloss.concepts.iteritems():
            oterms = concept.term(olang, env)
            tterms = concept.term(tlang, env)
            if oterms and tterms:
                concepts[ckey] = concept
                # Use first of the synonymous origin terms for ordering.
                # Must format it to plain text beforehand.
                ordering_links.append((tft(oterms[0].nom.text).lower(), ckey))

        if not concepts:
            warning(p_("warning message",
                       "no concepts found for PO view that have terms in both "
                       "the requested origin and target language"))

        langsort_tuples(ordering_links, 0, olang)

        if self._options.condesc:
            # Collect keys of all concepts which have same terms for different
            # concepts, in either of the languages.
            all_ckeys_by_term = {}
            for ckey, concept in concepts.iteritems():
                aterms = (  concept.term(olang, env)
                          + concept.term(tlang, env))
                for term in aterms:
                    nomstr = tft(term.nom.text)
                    if nomstr not in all_ckeys_by_term:
                        all_ckeys_by_term[nomstr] = []
                    all_ckeys_by_term[nomstr].append(ckey)
            conflicted = {}
            for nomstr, ckeys in all_ckeys_by_term.iteritems():
                if len(ckeys) > 1:
                    for ckey in ckeys:
                        conflicted[ckey] = True

        # Create PO messages by fields.
        class Message:
            def __init__ (self):
                self.comments = []
                self.msgctxt = ""
                self.msgid = ""
                self.msgstr = ""

        tdelim = "|" # delimiter for synonyms in msgid and msgstr

        messages = []
        for ckey in [x[1] for x in ordering_links]:
            concept = concepts[ckey]
            msg = Message()
            messages.append(msg)

            # Origin terms into the msgid.
            oterms = concept.term(olang, env)
            msg.msgid = tdelim.join([tft(x.nom.text) for x in oterms])

            # Target terms into the msgstr.
            tterms = concept.term(tlang, env)
            msg.msgstr = tdelim.join([tft(x.nom.text) for x in tterms])

            # Concept key into the msgctxt.
            msg.msgctxt = ckey

            # Auto comments.
            # - full description (possibly only if there is a term conflict)
            if not self._options.condesc or ckey in conflicted:
                # Give priority to description in target language.
                descs = concept.desc(tlang, env)
                if not descs:
                     descs = concept.desc(olang, env)
                if descs:
                    # Pick only first description if there are several.
                    msg.comments.append(tfds(descs[0].text))
            # - any declensions in target language
            for tterm in tterms:
                for decl in tterm.decl:
                    grn = gloss.grammar[decl.gr].shortname(tlang, env)[0]
                    msg.comments.append(tfdl(grn.text + [" "] + decl.text))

            # TODO: Implement source reference when lxml.etree can extract them.

        # Format PO header for output.
        fmt_header = ""
        s_title = tft(gloss.title(tlang, env)[0].text)
        fmt_header += (  '# '
                       + p_('header comment in the PO view (title)',
                            'PO view of a Divergloss glossary: %(title)s')
                         % dict(title=s_title)
                       + '\n')
        s_olang = tft(gloss.languages[olang].name(tlang, env)[0].text)
        s_tlang = tft(gloss.languages[tlang].name(tlang, env)[0].text)
        if env:
            s_env = tft(gloss.environments[env].name(tlang, env)[0].text)
            hcmnt = p_('header comment in the PO view (subtitle)',
                       'languages: %(ol)s->%(tl)s, environment: %(env)s') \
                    % dict(ol=s_olang, tl=s_tlang, env=s_env)
        else:
            hcmnt = p_('header comment in the PO view (subtitle)',
                       'languages: %(ol)s->%(tl)s') \
                    % dict(ol=s_olang, tl=s_tlang)
        fmt_header += (  '# '
                       + hcmnt
                       + '\n')
        fmt_header += (  '# '
                       + p_('comment in generated files (warning to user)',
                            '===== AUTOGENERATED FILE, DO NOT EDIT =====')
                       + '\n')
        fmt_header += 'msgid ""\n'
        fmt_header += 'msgstr ""\n'
        fmt_header += '"Project-Id-Version: %s\\n"\n' % gloss.id
        fmt_header += '"POT-Creation-Date: %s\\n"\n' % time.strftime("%F %R%z")
        fmt_header += '"PO-Revision-Date: %s\\n"\n' % time.strftime("%F %R%z")
        fmt_header += '"Last-Translator: n/a\\n"\n'
        fmt_header += '"Language-Team: n/a\\n"\n'
        fmt_header += '"MIME-Version: 1.0\\n"\n'
        fmt_header += '"Content-Type: text/plain; charset=UTF-8\\n"\n'
        fmt_header += '"Content-Transfer-Encoding: 8bit\\n"\n'

        # Format PO messages for output.
        def poescape (s):
            return s.replace('\n', '\\n').replace('"', '\\"')

        fmt_messages = []
        for msg in messages:
            fmt_msg = ''
            if msg.comments:
                fmt_msg += '\n'.join(msg.comments) + '\n'
            fmt_msg += 'msgctxt "%s"\n' % poescape(msg.msgctxt)
            fmt_msg += 'msgid "%s"\n' % poescape(msg.msgid)
            fmt_msg += 'msgstr "%s"\n' % poescape(msg.msgstr)
            fmt_messages.append(fmt_msg)

        # Output formatted concepts to requested stream.
        outf = sys.stdout
        if self._options.file:
            outf = open(self._options.file, "w")

        outf.write(fmt_header + "\n")
        outf.write("\n".join(fmt_messages) + "\n")

        if outf is not sys.stdout:
            outf.close()
Beispiel #3
0
    def __call__ (self, gloss):

        self._indent = "  "

        # Resolve languages and environment.
        olang = self._options.olang
        if olang not in gloss.languages:
            error(p_("error message",
                     "origin language '%(lang)s' not present in the glossary")
                    % dict(lang=olang))
        tlang = self._options.tlang
        if tlang not in gloss.languages:
            error(p_("error message",
                     "target language '%(lang)s' not present in the glossary")
                    % dict(lang=tlang))
        env = self._options.env or gloss.env[0]
        if env is not None and env not in gloss.environments:
            error(p_("error message",
                     "environment '%(env)s' not defined by the glossary")
                  % dict(env=env))

        # Select all concepts which have a term in both langenvs.
        concepts = {}
        for ckey, concept in gloss.concepts.iteritems():
            if concept.term(olang, env) and concept.term(tlang, env):
                concepts[ckey] = concept
        if not concepts:
            warning(p_("warning message",
                       "no concepts found which have terms in both "
                       "the origin and the target language and environment"))

        # Prepare text formatters.
        refbase = dict([(ckey, "") for ckey in concepts])
        tfn = TextFormatterPlain(gloss, lang=tlang, env=env)
        tf = TextFormatterHtml(gloss, lang=tlang, env=env, refbase=refbase)
        tfp = TextFormatterHtml(gloss, lang=tlang, env=env, refbase=refbase,
                                wtag="p")

        # Dictionary is presented as follows:
        # - all unique terms in the origin language presented
        # - for each unique origin term, all corresponding unique terms
        #   in the target language presented
        # - for each unique (origin, target) term pair, the descriptions of
        #   all concepts named by it are presented in the target language

        # Collect dict(oterm: dict(tterm: set(ckey)))
        # Collect dict(tterm: dict(gr: set(decl)))
        tdecls = {}
        bidict = {}
        for ckey, concept in concepts.iteritems():
            oterms = concept.term(olang, env)
            tterms = concept.term(tlang, env)
            for oterm in oterms:
                otnom = tfn(oterm.nom.text)
                if otnom not in bidict:
                    bidict[otnom] = {}
                for tterm in tterms:
                    # Target terms.
                    ttnom = tfn(tterm.nom.text)
                    if ttnom not in bidict[otnom]:
                        bidict[otnom][ttnom] = set()
                    bidict[otnom][ttnom].add(ckey)

                    # Declensions.
                    if ttnom not in tdecls:
                        tdecls[ttnom] = {}
                    for decl in tterm.decl:
                        gr = gloss.grammar[decl.gr]
                        grnam = tfn(gr.shortname(tlang, env)[0].text)
                        if grnam not in tdecls[ttnom]:
                            tdecls[ttnom][grnam] = set()
                        ttdecl = tfn(decl.text)
                        tdecls[ttnom][grnam].add(ttdecl)

        # Alphabetically sort origin terms.
        oterms_sorted = bidict.keys()
        langsort(oterms_sorted, olang)

        # Compose the dictionary table.
        accl = LineAccumulator(self._indent, 2)

        accl(stag("table", {"class":"bd-table"}))
        accl()

        # Header.
        accl(stag("tr", {"class":"bd-header"}), 1)
        olname = tfn(gloss.languages[olang].name(tlang, env)[0].text)
        accl(wtext(olname, "th", {"class":"bd-header-ol"}), 2)
        tlname = tfn(gloss.languages[tlang].name(tlang, env)[0].text)
        accl(wtext(tlname, "th", {"class":"bd-header-tl"}), 2)
        accl(etag("tr"), 1)

        # Entries by origin term.
        anchored = {}
        n_entry = 0
        n_entry_by_alpha = 0
        curr_alpha = None
        for oterm in oterms_sorted:
            n_entry += 1
            n_entry_by_alpha += 1

            # Add new alphabetical separator if needed.
            prev_alpha = curr_alpha
            curr_alpha = _term_alpha(oterm)
            if prev_alpha != curr_alpha:
                n_entry_by_alpha = 1
                accl(stag("tr", {"class":"bd-alsep"}), 1)
                accl(wtext(curr_alpha, "td", {"class":"bd-alsep-al",
                                              "colspan":"2"}), 2)
                accl(etag("tr"), 1)

            # Collapse all target terms which have same concepts.
            # Sort them alphabetically within the group,
            # then groups alphabetically by first term in the group.
            tterms_by_ckeygr = {}
            for tterm in bidict[oterm]:
                ckeys = list(bidict[oterm][tterm])
                ckeys.sort()
                ckeygr = tuple(ckeys)
                if ckeygr not in tterms_by_ckeygr:
                    tterms_by_ckeygr[ckeygr] = []
                tterms_by_ckeygr[ckeygr].append(tterm)
            tterms_groups = []
            for ckeys, tterms in tterms_by_ckeygr.iteritems():
                langsort(tterms, tlang)
                tterms_groups.append((tterms[0], tterms, ckeys))
            langsort_tuples(tterms_groups, 0, tlang)
            tterms_ckeys = [x[1:] for x in tterms_groups]

            if n_entry_by_alpha % 2 == 1:
                accl(stag("tr", {"class":"bd-entry-odd"}), 1)
            else:
                #accl(stag("tr", {"class":"bd-entry-even"}), 1)
                #... provide as option; randomly increases VCS deltas.
                accl(stag("tr", {"class":"bd-entry-odd"}), 1)

            # Column with origin term and anchors.
            accl(stag("td", {"class":"bd-oterm"}), 2)

            # Dummy anchors, for cross-references in descriptions to work.
            # Add anchors for all concepts covered by this entry,
            # and remember them, to avoid duplicate anchors on synonyms.
            new_ckeys = []
            for tterms, ckeys in tterms_ckeys:
                for ckey in ckeys:
                    if ckey not in anchored:
                        anchored[ckey] = True
                        new_ckeys.append(ckey)
            accl("".join([stag("span", {"id":x}, close=True)
                          for x in new_ckeys]), 3)

            # Origin term.
            accl(wtext(oterm, "p", {"class":"bd-otline"}), 3)
            accl(etag("td"), 2)

            # Column with target terms.
            accl(stag("td", {"class":"bd-tterms"}), 2)

            n_ttgr = 0
            for tterms, ckeys in tterms_ckeys:
                n_ttgr += 1
                accl(stag("div", {"class":"bd-ttgroup"}), 3)

                # Equip each term with extra info.
                tterms_compgr = []
                for tterm in tterms:
                    # Declensions.
                    lsep_dc = p_("list separator: "
                                 "acceptable variants of the same declension",
                                 ", ")
                    fmt_dcgr = p_("declension group: single declension given "
                                  "by its name and acceptable variants",
                                  "<i>%(dname)s</i> %(dvars)s")
                    lsep_gr = p_("list separator: "
                                 "declension groups",
                                 "; ")
                    tdecl = None
                    if tterm in tdecls:
                        lst = []
                        for gr, decls in tdecls[tterm].iteritems():
                            lst2 = list(decls)
                            langsort(lst2, tlang)
                            lst.append((gr, lsep_dc.join(lst2)))
                        langsort_tuples(lst, 0, tlang)
                        tdecl = lsep_gr.join([fmt_dcgr % dict(dname=x[0],
                                                              dvars=x[1])
                                              for x in lst])
                    # Compose.
                    if tdecl:
                        ttcgr = p_("term with declensions",
                                   "%(term)s (%(decls)s)") \
                                % dict(term=tterm, decls=tdecl)
                    else:
                        ttcgr = tterm
                    tterms_compgr.append(ttcgr)

                # Collect details for each term.
                has_details = False
                # - descriptions
                descstrs = []
                for ckey in ckeys:
                    for desc in concepts[ckey].desc(tlang, env):
                        if tfn(desc.text):
                            descstrs.append(tfp(desc.text, pclass="bd-desc"))
                            has_details = True
                if len(descstrs) > 1:
                    for i in range(len(descstrs)):
                        dhead = "%d. " % (i + 1)
                        descstrs[i] = descstrs[i].replace(">", ">" + dhead, 1)

                # Entry display control (if any details present).
                details_id = "opt_%s_%d" % (oterm.replace(" ", "_"), n_ttgr)
                if has_details:
                    accl(stag("div", {"class":"bd-edctl"}), 4)
                    accl(wtext("[+]", "a",
                               {"class":"bd-edctl",
                                "title":p_("tooltip", "Show details"),
                                "href":"#",
                                "onclick":"return show_hide(this, '%s')"
                                          % details_id}), 5)
                    accl(etag("div"), 4)

                # Line with terms.
                lsep_tt = p_("list separator: synonymous terms",
                             ", ")
                ttstr = lsep_tt.join(tterms_compgr)
                if len(tterms_ckeys) > 1:
                    ttstr = p_("enumerated target term in the dictionary, "
                               "one of the meanings of the original term",
                               "%(num)d. %(term)s") \
                            % dict(num=n_ttgr, term=ttstr)
                accl(wtext(ttstr, "p", {"class":"bd-ttline"}), 4)

                # Optional details.
                if has_details:
                    accl(stag("div", {"id":details_id,
                                      "style":"display: none;"}), 4)

                    for descstr in descstrs:
                        accl(descstr, 5)

                    accl(etag("div"), 4)

                accl(etag("div"), 3)

            accl(etag("td"), 2)
            accl(etag("tr"), 1)
            accl()

        accl(etag("table"))
        accl()

        # Prepare style file path.
        stylepath = None
        if self._options.style:
            if self._options.cssfile:
                stylepath = self._options.cssfile
            else:
                stylepath = _replace_ext(os.path.basename(self._options.file),
                                         "css")
            stylepath_nr = os.path.join(os.path.dirname(self._options.file),
                                        stylepath)
            stylesrc = os.path.join(  _src_style_dir, self._options.style
                                    + ".css.in")

        # Prepare JavaScript file path.
        dctlpath = None
        if self._options.jsfile:
            dctlpath = self._options.jsfile
        else:
            dctlpath = _replace_ext(os.path.basename(self._options.file), "js")
        dctlpath_nr = os.path.join(os.path.dirname(self._options.file),
                                   dctlpath)

        # Prepare PHP inclusion file path.
        phpincpath = None
        if self._options.incfile:
            phpincpath = self._options.incfile
        else:
            phpincpath = _replace_ext(os.path.basename(self._options.file),
                                      "inc")
        phpincpath_nr = os.path.join(os.path.dirname(self._options.file),
                                     phpincpath)

        # If style requested, fetch the .in file and resolve placeholders.
        if self._options.style:
            # Parse values given in the command line.
            stodict = dict([x[:2] for x in _styleopt_spec])
            for sopt in self._options.styleopt:
                lst = [x.strip() for x in sopt.split("=", 1)]
                if len(lst) < 2:
                    warning(p_("warning message",
                               "malformed CSS style option '%(opt)s'")
                            % dict(opt=sopt))
                    continue
                name, value = lst
                if name not in stodict:
                    warning(p_("warning message",
                               "unknown CSS style option '%(opt)s'")
                            % dict(opt=sopt))
                    continue
                stodict[name] = value

            # Replace placeholders in the input style sheet.
            raccl = LineAccumulator()
            raccl.read(stylesrc)
            styleaccl = LineAccumulator()
            sto_rx = re.compile("@(\w+)@")
            for line in raccl.lines:
                nline = ""
                lastpos = 0
                for m in sto_rx.finditer(line):
                    nline += line[lastpos:m.span()[0]]
                    lastpos = m.span()[1]
                    soname = m.group(1)
                    sovalue = stodict.get(soname)
                    if soname not in stodict:
                        error(p_("error message",
                                 "unknown CSS style option '%(opt)s' "
                                 "requested by the input style sheet "
                                 "'%(fname)s'")
                              % dict(opt=soname, fname=stylesrc))
                    nline += sovalue
                nline += line[lastpos:]
                styleaccl(nline)

        # Create separate CSS and JS files, or raw inclusion file,
        # or collect everything for direct embedding.
        auxaccl = None
        if not self._options.phpinc and not self._options.allinone:
            shutil.copyfile(_src_dctl_file, dctlpath_nr)
            if self._options.style:
                styleaccl.write(stylepath_nr)
            phpincpath = None # _fmt_header checks this for what to include
        else:
            raccl = LineAccumulator()
            raccl("<script type='text/javascript'>")
            raccl.read(_src_dctl_file)
            raccl("</script>")
            raccl()
            if self._options.style:
                raccl("<style type='text/css'>")
                raccl(styleaccl)
                raccl("</style>")
                raccl()
            if not self._options.allinone:
                raccl.write(phpincpath_nr)
            else:
                auxaccl = raccl

        # Header.
        accl_head = LineAccumulator(self._indent, 0)
        if not self._options.header:
            gname = tfn(gloss.title(tlang, env)[0].text)
            if env:
                ename = tfn(gloss.environments[env].name(tlang, env)[0].text)
                title = p_("top page title",
                           "%(gloss)s (%(env)s)") \
                        % dict(gloss=gname, env=ename)
            else:
                title = gname
            self._fmt_header(accl_head, tlang, title,
                             stylepath, dctlpath, phpincpath)
        else:
            accl_head.read(self._options.header)

        # Footer.
        accl_foot = LineAccumulator(self._indent, 0)
        if not self._options.footer:
            self._fmt_footer(accl_foot)
        else:
            accl_foot.read(self._options.footer)

        # Collect everything and write out the HTML page.
        accl_all = LineAccumulator(self._indent, 0)
        accl_all(accl_head)
        if auxaccl:
            accl_all(auxaccl, 2)
        accl_all(accl)
        accl_all(accl_foot)
        accl_all.write(self._options.file)
Beispiel #4
0
    def __call__ (self, gloss):

        # Resolve language and environment.
        lang = self._options.lang or gloss.lang
        if lang is not None and lang not in gloss.languages:
            error(p_("error message",
                     "language '%(lang)s' does not exist in the glossary")
                    % dict(lang=lang))

        env = self._options.env or gloss.env[0]
        if env is not None and env not in gloss.environments:
            error(p_("error message",
                     "environment '%(env)s' does not exist in the glossary")
                  % dict(env=env))

        # Text formatter for selected language and environment.
        tfm = TextFormatterPlain(gloss, lang=lang, env=env)

        # Select all concepts which have a term in this langenv.
        # Collect terms for lexicographical ordering.
        concepts = {}
        ordering_links = []
        for ckey, concept in gloss.concepts.iteritems():
            terms = concept.term(lang, env)
            if terms:
                concepts[ckey] = concept
                # Use first of the synonymous terms for ordering.
                ordering_links.append((tfm(terms[0].nom.text).lower(), ckey))

        langsort_tuples(ordering_links, 0, lang)

        # Format glossary metadata for output.
        fmt_header_list = []
        fmt_title = tfm(gloss.title(lang, env)[0].text)
        if env is not None:
            fmt_envname = tfm(gloss.environments[env].name(lang, env)[0].text)
            fmt_title = "%s (%s)" % (fmt_title, fmt_envname)
        fmt_header_list.append(fmt_title)
        fmt_header_list.append("\n")
        fmt_header_list.append("-" * len(fmt_title) + "\n")
        fmt_header = "".join(fmt_header_list)

        # Format concepts for output.
        fmt_concepts = []
        for concept in [concepts[x[1]] for x in ordering_links]:
            fmtlist = []

            # Terms for this langenv.
            tft = TextFormatterPlain(gloss, lang=lang, env=env)
            terms = concept.term(lang, env)
            fmtlist.append("  ")
            fmtlist.append(", ".join([tft(x.nom.text) for x in terms]))
            # Also terms in other languages, but the same environment.
            fmt_ots = []
            for olang in [x for x in gloss.languages if x != lang]:
                oterms = concept.term(olang, env)
                lname = gloss.languages[olang].shortname(lang, env)
                if oterms and lname:
                    l = tft(lname[0].text)
                    ts = ", ".join([tft(x.nom.text) for x in oterms])
                    fmt_ots.append("%s /%s/" % (l, ts))
            if fmt_ots:
                fmtlist.append(" (%s)" % ("; ".join(fmt_ots)))

            # All descriptions for this langenv.
            tfd = TextFormatterPlain(gloss, lang=lang, env=env, indent="    ",
                                     wcol=self._options.wcol)
            descs = concept.desc(lang, env)
            if descs:
                fmtlist.append("\n")
                fmt_ds = []
                if len(descs) == 1:
                    fmt_ds.append(tfd(descs[0].text))
                elif len(descs) > 1:
                    # Enumerate descriptions.
                    for i in range(len(descs)):
                        fmt_ds.append(tfd(descs[i].text,
                                          prefix=("%d. " % (i + 1))))
                fmtlist.append("\n\n".join(fmt_ds))

            # Done formatting concept.
            fmt_concepts.append("".join(fmtlist))

        # Output formatted concepts to requested stream.
        outf = sys.stdout
        if self._options.file:
            outf = open(self._options.file, "w")

        outf.write(fmt_header + "\n")
        outf.write("\n\n".join(fmt_concepts)+"\n\n")

        if outf is not sys.stdout:
            outf.close()