def _res_embsel_best_text (gloss, ntext, env): text = copy.copy(ntext) text[:] = [] for seg in ntext: if isinstance(seg, Text): text.append(_res_embsel_best_text(gloss, seg, env)) elif isinstance(seg, dict): # Try first direct match for environment. if env in seg: text.append(seg[env]) else: # Try a close environment. found_close = False if env in gloss.environments: for cenv in gloss.environments[env].closeto: if cenv in seg: text.append(seg[cenv]) found_close = True break # Take a best shot. if not found_close: if env not in gloss.env: warning(p_("warning message", "no resolution for expected environment " "'%(env)s' in embedded selector '%(esel)s'") % dict(env=env, esel=seg.unparsed)) # Pick at random. text.append(random.choice(seg.values())) else: text.append(seg) return text
def _res_embsel_parse_one (seg, denvs): ntext = Text() envs = set() p1 = seg.find("~") p2 = -1 while p1 >= 0: head = seg[p2+1:p1] if head: ntext.append(head) p2 = seg.find("~", p1 + 1) if p2 < 0: warning(p_("warning message", "unterminated embedded selector '%(esel)s'") % {"esel":seg}) p2 = p1 - 1 break class DictWProps (dict): pass envsegs = DictWProps() locenvs = set() for eseg in seg[p1+1:p2].split("|"): pc = eseg.find(":") if pc >= 0: cenvs = eseg[:pc].split() cseg = eseg[pc+1:] else: cenvs = denvs cseg = eseg repenvs = locenvs.intersection(cenvs) if repenvs: fmtes = " ".join([str(x) for x in list(repenvs)]) warning(p_("warning message", "segment '%(eseg)s' in embedded selector " "'%(esel)s' repeats environments: %(envs)s") % {"esel":seg, "eseg":eseg, "envs":fmtes}) locenvs.update(cenvs) for cenv in cenvs: envsegs[cenv] = cseg # Add embedded selector string under a dummy environment, # needed later for error reporting. envsegs.unparsed = seg ntext.append(envsegs) envs.update(locenvs) p1 = seg.find("~", p2 + 1) tail = seg[p2+1:] if tail: ntext.append(tail) return ntext, envs
def __call__(self, gloss): # Resolve languages and environments. olang = self._options.olang if olang not in gloss.languages: error(p_("error message", "origin language '%(lang)s' not present in the glossary") % dict(lang=olang)) tlang = self._options.tlang if tlang not in gloss.languages: error(p_("error message", "target language '%(lang)s' not present in the glossary") % dict(lang=tlang)) env = self._options.env or gloss.env[0] if env and env not in gloss.environments: error(p_("error message", "environment '%(env)s' not defined by the glossary") % dict(env=env)) benv = self._options.benv if benv and benv not in gloss.environments: error(p_("error message", "environment '%(env)s' not defined by the glossary") % dict(env=benv)) rulefile = self._options.file # Formatters for resolving glossary into plain text. tft = TextFormatterPlain(gloss, lang=tlang, env=env) tdelim = "|" # to be able to send terms to regex too def format_terms(concept, env=env): oterms = concept.term(olang, env) tterms = concept.term(tlang, env) if not oterms or not tterms: return None, None oterms = [tft(x.nom.text) for x in oterms] langsort(oterms, olang) otermsall = tdelim.join(oterms) tterms = [tft(x.nom.text) for x in tterms] langsort(tterms, tlang) ttermsall = tdelim.join(tterms) return otermsall, ttermsall # From concepts which have a term in both langenvs, # assemble the data needed to construct rules. # Also collect keys of concepts which are shared with # the base environment *from the viewpoint of rules*. concepts_data = {} concepts_shared = set() for ckey, concept in gloss.concepts.iteritems(): oterms, tterms = format_terms(concept) if oterms and tterms: concepts_data[ckey] = (oterms, tterms) if benv: # Concept shared if original/target terminology same. boterms, btterms = format_terms(concept, benv) if oterms == boterms and tterms == btterms: concepts_shared.add(ckey) if not concepts_data: warning( p_( "warning message", "no concepts found for PO view that have terms in both " "the requested origin and target language", ) ) # Parse rules file. rules, rmap, plines, elines = [], {}, [], [] if os.path.isfile(rulefile): rules, rmap, plines, elines = self._load_rules(rulefile) # Flag all existing rules. for rkey, rule in rmap.iteritems(): if rkey not in concepts_data: rule.set_flag("obsolete") continue oterms, tterms = concepts_data[rkey] if benv and rkey in concepts_shared: note = None if oterms != rule.oterms or tterms != rule.tterms: note = "%s = %s" % (oterms, tterms) rule.set_flag("merge", note) continue if oterms != rule.oterms or tterms != rule.tterms: note = "%s = %s" % (oterms, tterms) rule.set_flag("fuzzy", note) continue if not rule.has_flag("new"): rule.set_flag("") # Add new rules, in lexicographical order by keys. ckeys = concepts_data.keys() ckeys.sort() last_ins_pos = -1 for ckey in ckeys: if ckey in rmap: continue if ckey in concepts_shared: continue nrule = self._Rule() nrule.ckey = ckey nrule.oterms, nrule.tterms = concepts_data[ckey] nrule.disabled = True # Add all fields for establishing ordering; # some will get their real values on sync. if tdelim not in nrule.oterms: topmatch = "{\\b%s}" % nrule.oterms else: topmatch = "{\\b(%s)}" % nrule.oterms if nrule.oterms.islower(): topmatch += "i" nrule.lines.append(topmatch) nrule.lines.append('id=""') nrule.lines.append('hint=""') if tdelim not in nrule.tterms: valmatch = 'valid msgstr="\\b%s"' % nrule.tterms else: valmatch = 'valid msgstr="\\b(%s)"' % nrule.tterms nrule.lines.append(valmatch) nrule.lines.append("disabled") nrule.set_flag("new") inserted = False for i in range(last_ins_pos + 1, len(rules)): if ckey < rules[i].ckey: last_ins_pos = i rules.insert(i, nrule) inserted = True break if not inserted: last_ins_pos = len(rules) rules.append(nrule) rmap[ckey] = nrule # Write rules back. ofl = codecs.open(rulefile, "w", "UTF-8") ofl.writelines([x + "\n" for x in plines]) for rule in rules: ofl.writelines(rule.format_lines()) ofl.writelines([x + "\n" for x in elines]) ofl.close()
def __call__ (self, gloss): # Resolve languages and environment. olang = self._options.olang if olang not in gloss.languages: error(p_("error message", "origin language '%(lang)s' not present in the glossary") % dict(lang=olang)) tlang = self._options.tlang if tlang not in gloss.languages: error(p_("error message", "target language '%(lang)s' not present in the glossary") % dict(lang=tlang)) env = self._options.env or gloss.env[0] if env is not None and env not in gloss.environments: error(p_("error message", "environment '%(env)s' not defined by the glossary") % dict(env=env)) # Formatters for resolving glossary into plain text. tft = TextFormatterPlain(gloss, lang=tlang, env=env) s_desc = p_("message comment in the PO view: " "short label preceeding a concept description", "desc: ") cpref = "# " tfds = TextFormatterPlain(gloss, lang=tlang, env=env, first_indent=(cpref + s_desc), indent=(cpref + " " * len(s_desc)), wcol=79) s_decl = p_("message comment in the PO view: " "short label preceeding a declination", "decl: ") tfdl = TextFormatterPlain(gloss, lang=tlang, env=env, prefix=(cpref + s_decl)) # Select all concepts which have a term in both langenvs. # Collect terms from the origin language for lexicographical ordering. concepts = {} ordering_links = [] for ckey, concept in gloss.concepts.iteritems(): oterms = concept.term(olang, env) tterms = concept.term(tlang, env) if oterms and tterms: concepts[ckey] = concept # Use first of the synonymous origin terms for ordering. # Must format it to plain text beforehand. ordering_links.append((tft(oterms[0].nom.text).lower(), ckey)) if not concepts: warning(p_("warning message", "no concepts found for PO view that have terms in both " "the requested origin and target language")) langsort_tuples(ordering_links, 0, olang) if self._options.condesc: # Collect keys of all concepts which have same terms for different # concepts, in either of the languages. all_ckeys_by_term = {} for ckey, concept in concepts.iteritems(): aterms = ( concept.term(olang, env) + concept.term(tlang, env)) for term in aterms: nomstr = tft(term.nom.text) if nomstr not in all_ckeys_by_term: all_ckeys_by_term[nomstr] = [] all_ckeys_by_term[nomstr].append(ckey) conflicted = {} for nomstr, ckeys in all_ckeys_by_term.iteritems(): if len(ckeys) > 1: for ckey in ckeys: conflicted[ckey] = True # Create PO messages by fields. class Message: def __init__ (self): self.comments = [] self.msgctxt = "" self.msgid = "" self.msgstr = "" tdelim = "|" # delimiter for synonyms in msgid and msgstr messages = [] for ckey in [x[1] for x in ordering_links]: concept = concepts[ckey] msg = Message() messages.append(msg) # Origin terms into the msgid. oterms = concept.term(olang, env) msg.msgid = tdelim.join([tft(x.nom.text) for x in oterms]) # Target terms into the msgstr. tterms = concept.term(tlang, env) msg.msgstr = tdelim.join([tft(x.nom.text) for x in tterms]) # Concept key into the msgctxt. msg.msgctxt = ckey # Auto comments. # - full description (possibly only if there is a term conflict) if not self._options.condesc or ckey in conflicted: # Give priority to description in target language. descs = concept.desc(tlang, env) if not descs: descs = concept.desc(olang, env) if descs: # Pick only first description if there are several. msg.comments.append(tfds(descs[0].text)) # - any declensions in target language for tterm in tterms: for decl in tterm.decl: grn = gloss.grammar[decl.gr].shortname(tlang, env)[0] msg.comments.append(tfdl(grn.text + [" "] + decl.text)) # TODO: Implement source reference when lxml.etree can extract them. # Format PO header for output. fmt_header = "" s_title = tft(gloss.title(tlang, env)[0].text) fmt_header += ( '# ' + p_('header comment in the PO view (title)', 'PO view of a Divergloss glossary: %(title)s') % dict(title=s_title) + '\n') s_olang = tft(gloss.languages[olang].name(tlang, env)[0].text) s_tlang = tft(gloss.languages[tlang].name(tlang, env)[0].text) if env: s_env = tft(gloss.environments[env].name(tlang, env)[0].text) hcmnt = p_('header comment in the PO view (subtitle)', 'languages: %(ol)s->%(tl)s, environment: %(env)s') \ % dict(ol=s_olang, tl=s_tlang, env=s_env) else: hcmnt = p_('header comment in the PO view (subtitle)', 'languages: %(ol)s->%(tl)s') \ % dict(ol=s_olang, tl=s_tlang) fmt_header += ( '# ' + hcmnt + '\n') fmt_header += ( '# ' + p_('comment in generated files (warning to user)', '===== AUTOGENERATED FILE, DO NOT EDIT =====') + '\n') fmt_header += 'msgid ""\n' fmt_header += 'msgstr ""\n' fmt_header += '"Project-Id-Version: %s\\n"\n' % gloss.id fmt_header += '"POT-Creation-Date: %s\\n"\n' % time.strftime("%F %R%z") fmt_header += '"PO-Revision-Date: %s\\n"\n' % time.strftime("%F %R%z") fmt_header += '"Last-Translator: n/a\\n"\n' fmt_header += '"Language-Team: n/a\\n"\n' fmt_header += '"MIME-Version: 1.0\\n"\n' fmt_header += '"Content-Type: text/plain; charset=UTF-8\\n"\n' fmt_header += '"Content-Transfer-Encoding: 8bit\\n"\n' # Format PO messages for output. def poescape (s): return s.replace('\n', '\\n').replace('"', '\\"') fmt_messages = [] for msg in messages: fmt_msg = '' if msg.comments: fmt_msg += '\n'.join(msg.comments) + '\n' fmt_msg += 'msgctxt "%s"\n' % poescape(msg.msgctxt) fmt_msg += 'msgid "%s"\n' % poescape(msg.msgid) fmt_msg += 'msgstr "%s"\n' % poescape(msg.msgstr) fmt_messages.append(fmt_msg) # Output formatted concepts to requested stream. outf = sys.stdout if self._options.file: outf = open(self._options.file, "w") outf.write(fmt_header + "\n") outf.write("\n".join(fmt_messages) + "\n") if outf is not sys.stdout: outf.close()
def __call__ (self, gloss): self._indent = " " # Resolve languages and environment. olang = self._options.olang if olang not in gloss.languages: error(p_("error message", "origin language '%(lang)s' not present in the glossary") % dict(lang=olang)) tlang = self._options.tlang if tlang not in gloss.languages: error(p_("error message", "target language '%(lang)s' not present in the glossary") % dict(lang=tlang)) env = self._options.env or gloss.env[0] if env is not None and env not in gloss.environments: error(p_("error message", "environment '%(env)s' not defined by the glossary") % dict(env=env)) # Select all concepts which have a term in both langenvs. concepts = {} for ckey, concept in gloss.concepts.iteritems(): if concept.term(olang, env) and concept.term(tlang, env): concepts[ckey] = concept if not concepts: warning(p_("warning message", "no concepts found which have terms in both " "the origin and the target language and environment")) # Prepare text formatters. refbase = dict([(ckey, "") for ckey in concepts]) tfn = TextFormatterPlain(gloss, lang=tlang, env=env) tf = TextFormatterHtml(gloss, lang=tlang, env=env, refbase=refbase) tfp = TextFormatterHtml(gloss, lang=tlang, env=env, refbase=refbase, wtag="p") # Dictionary is presented as follows: # - all unique terms in the origin language presented # - for each unique origin term, all corresponding unique terms # in the target language presented # - for each unique (origin, target) term pair, the descriptions of # all concepts named by it are presented in the target language # Collect dict(oterm: dict(tterm: set(ckey))) # Collect dict(tterm: dict(gr: set(decl))) tdecls = {} bidict = {} for ckey, concept in concepts.iteritems(): oterms = concept.term(olang, env) tterms = concept.term(tlang, env) for oterm in oterms: otnom = tfn(oterm.nom.text) if otnom not in bidict: bidict[otnom] = {} for tterm in tterms: # Target terms. ttnom = tfn(tterm.nom.text) if ttnom not in bidict[otnom]: bidict[otnom][ttnom] = set() bidict[otnom][ttnom].add(ckey) # Declensions. if ttnom not in tdecls: tdecls[ttnom] = {} for decl in tterm.decl: gr = gloss.grammar[decl.gr] grnam = tfn(gr.shortname(tlang, env)[0].text) if grnam not in tdecls[ttnom]: tdecls[ttnom][grnam] = set() ttdecl = tfn(decl.text) tdecls[ttnom][grnam].add(ttdecl) # Alphabetically sort origin terms. oterms_sorted = bidict.keys() langsort(oterms_sorted, olang) # Compose the dictionary table. accl = LineAccumulator(self._indent, 2) accl(stag("table", {"class":"bd-table"})) accl() # Header. accl(stag("tr", {"class":"bd-header"}), 1) olname = tfn(gloss.languages[olang].name(tlang, env)[0].text) accl(wtext(olname, "th", {"class":"bd-header-ol"}), 2) tlname = tfn(gloss.languages[tlang].name(tlang, env)[0].text) accl(wtext(tlname, "th", {"class":"bd-header-tl"}), 2) accl(etag("tr"), 1) # Entries by origin term. anchored = {} n_entry = 0 n_entry_by_alpha = 0 curr_alpha = None for oterm in oterms_sorted: n_entry += 1 n_entry_by_alpha += 1 # Add new alphabetical separator if needed. prev_alpha = curr_alpha curr_alpha = _term_alpha(oterm) if prev_alpha != curr_alpha: n_entry_by_alpha = 1 accl(stag("tr", {"class":"bd-alsep"}), 1) accl(wtext(curr_alpha, "td", {"class":"bd-alsep-al", "colspan":"2"}), 2) accl(etag("tr"), 1) # Collapse all target terms which have same concepts. # Sort them alphabetically within the group, # then groups alphabetically by first term in the group. tterms_by_ckeygr = {} for tterm in bidict[oterm]: ckeys = list(bidict[oterm][tterm]) ckeys.sort() ckeygr = tuple(ckeys) if ckeygr not in tterms_by_ckeygr: tterms_by_ckeygr[ckeygr] = [] tterms_by_ckeygr[ckeygr].append(tterm) tterms_groups = [] for ckeys, tterms in tterms_by_ckeygr.iteritems(): langsort(tterms, tlang) tterms_groups.append((tterms[0], tterms, ckeys)) langsort_tuples(tterms_groups, 0, tlang) tterms_ckeys = [x[1:] for x in tterms_groups] if n_entry_by_alpha % 2 == 1: accl(stag("tr", {"class":"bd-entry-odd"}), 1) else: #accl(stag("tr", {"class":"bd-entry-even"}), 1) #... provide as option; randomly increases VCS deltas. accl(stag("tr", {"class":"bd-entry-odd"}), 1) # Column with origin term and anchors. accl(stag("td", {"class":"bd-oterm"}), 2) # Dummy anchors, for cross-references in descriptions to work. # Add anchors for all concepts covered by this entry, # and remember them, to avoid duplicate anchors on synonyms. new_ckeys = [] for tterms, ckeys in tterms_ckeys: for ckey in ckeys: if ckey not in anchored: anchored[ckey] = True new_ckeys.append(ckey) accl("".join([stag("span", {"id":x}, close=True) for x in new_ckeys]), 3) # Origin term. accl(wtext(oterm, "p", {"class":"bd-otline"}), 3) accl(etag("td"), 2) # Column with target terms. accl(stag("td", {"class":"bd-tterms"}), 2) n_ttgr = 0 for tterms, ckeys in tterms_ckeys: n_ttgr += 1 accl(stag("div", {"class":"bd-ttgroup"}), 3) # Equip each term with extra info. tterms_compgr = [] for tterm in tterms: # Declensions. lsep_dc = p_("list separator: " "acceptable variants of the same declension", ", ") fmt_dcgr = p_("declension group: single declension given " "by its name and acceptable variants", "<i>%(dname)s</i> %(dvars)s") lsep_gr = p_("list separator: " "declension groups", "; ") tdecl = None if tterm in tdecls: lst = [] for gr, decls in tdecls[tterm].iteritems(): lst2 = list(decls) langsort(lst2, tlang) lst.append((gr, lsep_dc.join(lst2))) langsort_tuples(lst, 0, tlang) tdecl = lsep_gr.join([fmt_dcgr % dict(dname=x[0], dvars=x[1]) for x in lst]) # Compose. if tdecl: ttcgr = p_("term with declensions", "%(term)s (%(decls)s)") \ % dict(term=tterm, decls=tdecl) else: ttcgr = tterm tterms_compgr.append(ttcgr) # Collect details for each term. has_details = False # - descriptions descstrs = [] for ckey in ckeys: for desc in concepts[ckey].desc(tlang, env): if tfn(desc.text): descstrs.append(tfp(desc.text, pclass="bd-desc")) has_details = True if len(descstrs) > 1: for i in range(len(descstrs)): dhead = "%d. " % (i + 1) descstrs[i] = descstrs[i].replace(">", ">" + dhead, 1) # Entry display control (if any details present). details_id = "opt_%s_%d" % (oterm.replace(" ", "_"), n_ttgr) if has_details: accl(stag("div", {"class":"bd-edctl"}), 4) accl(wtext("[+]", "a", {"class":"bd-edctl", "title":p_("tooltip", "Show details"), "href":"#", "onclick":"return show_hide(this, '%s')" % details_id}), 5) accl(etag("div"), 4) # Line with terms. lsep_tt = p_("list separator: synonymous terms", ", ") ttstr = lsep_tt.join(tterms_compgr) if len(tterms_ckeys) > 1: ttstr = p_("enumerated target term in the dictionary, " "one of the meanings of the original term", "%(num)d. %(term)s") \ % dict(num=n_ttgr, term=ttstr) accl(wtext(ttstr, "p", {"class":"bd-ttline"}), 4) # Optional details. if has_details: accl(stag("div", {"id":details_id, "style":"display: none;"}), 4) for descstr in descstrs: accl(descstr, 5) accl(etag("div"), 4) accl(etag("div"), 3) accl(etag("td"), 2) accl(etag("tr"), 1) accl() accl(etag("table")) accl() # Prepare style file path. stylepath = None if self._options.style: if self._options.cssfile: stylepath = self._options.cssfile else: stylepath = _replace_ext(os.path.basename(self._options.file), "css") stylepath_nr = os.path.join(os.path.dirname(self._options.file), stylepath) stylesrc = os.path.join( _src_style_dir, self._options.style + ".css.in") # Prepare JavaScript file path. dctlpath = None if self._options.jsfile: dctlpath = self._options.jsfile else: dctlpath = _replace_ext(os.path.basename(self._options.file), "js") dctlpath_nr = os.path.join(os.path.dirname(self._options.file), dctlpath) # Prepare PHP inclusion file path. phpincpath = None if self._options.incfile: phpincpath = self._options.incfile else: phpincpath = _replace_ext(os.path.basename(self._options.file), "inc") phpincpath_nr = os.path.join(os.path.dirname(self._options.file), phpincpath) # If style requested, fetch the .in file and resolve placeholders. if self._options.style: # Parse values given in the command line. stodict = dict([x[:2] for x in _styleopt_spec]) for sopt in self._options.styleopt: lst = [x.strip() for x in sopt.split("=", 1)] if len(lst) < 2: warning(p_("warning message", "malformed CSS style option '%(opt)s'") % dict(opt=sopt)) continue name, value = lst if name not in stodict: warning(p_("warning message", "unknown CSS style option '%(opt)s'") % dict(opt=sopt)) continue stodict[name] = value # Replace placeholders in the input style sheet. raccl = LineAccumulator() raccl.read(stylesrc) styleaccl = LineAccumulator() sto_rx = re.compile("@(\w+)@") for line in raccl.lines: nline = "" lastpos = 0 for m in sto_rx.finditer(line): nline += line[lastpos:m.span()[0]] lastpos = m.span()[1] soname = m.group(1) sovalue = stodict.get(soname) if soname not in stodict: error(p_("error message", "unknown CSS style option '%(opt)s' " "requested by the input style sheet " "'%(fname)s'") % dict(opt=soname, fname=stylesrc)) nline += sovalue nline += line[lastpos:] styleaccl(nline) # Create separate CSS and JS files, or raw inclusion file, # or collect everything for direct embedding. auxaccl = None if not self._options.phpinc and not self._options.allinone: shutil.copyfile(_src_dctl_file, dctlpath_nr) if self._options.style: styleaccl.write(stylepath_nr) phpincpath = None # _fmt_header checks this for what to include else: raccl = LineAccumulator() raccl("<script type='text/javascript'>") raccl.read(_src_dctl_file) raccl("</script>") raccl() if self._options.style: raccl("<style type='text/css'>") raccl(styleaccl) raccl("</style>") raccl() if not self._options.allinone: raccl.write(phpincpath_nr) else: auxaccl = raccl # Header. accl_head = LineAccumulator(self._indent, 0) if not self._options.header: gname = tfn(gloss.title(tlang, env)[0].text) if env: ename = tfn(gloss.environments[env].name(tlang, env)[0].text) title = p_("top page title", "%(gloss)s (%(env)s)") \ % dict(gloss=gname, env=ename) else: title = gname self._fmt_header(accl_head, tlang, title, stylepath, dctlpath, phpincpath) else: accl_head.read(self._options.header) # Footer. accl_foot = LineAccumulator(self._indent, 0) if not self._options.footer: self._fmt_footer(accl_foot) else: accl_foot.read(self._options.footer) # Collect everything and write out the HTML page. accl_all = LineAccumulator(self._indent, 0) accl_all(accl_head) if auxaccl: accl_all(auxaccl, 2) accl_all(accl) accl_all(accl_foot) accl_all.write(self._options.file)