def remove_redundancy(self): tmdp_list = {} # MdFile for fobj in self.mdf: mdfn = fobj.md_filename() self.mdp = fobj.md_paragraph() # MdPara for msg in self.mdp.keys(): pobj = self.mdp[msg] lnlist = pobj.line_number_list() if msg in tmdp_list.keys(): tmdp = tmdp_list[msg] for ln in lnlist: tmdp.add_line_number('{}:{}'.format(mdfn, ln)) tmdp_list.update({msg: tmdp}) else: tmdp = MdPara() tmdp.set_type(pobj.para_type(), pobj.para_msg()) for ln in lnlist: tmdp.add_line_number('{}:{}'.format(mdfn, ln)) tmdp_list[msg] = tmdp del self.mdf self.mdfd = {} for msg in tmdp_list.keys(): tmdp = tmdp_list[msg] fnln = tmdp.line_number_list()[0].split(':') fn = fnln[0] ln = int(fnln[1]) if fn in self.mdfd.keys(): fobj = self.mdfd.get(fn) fobj.set_md_paragraph(ln, tmdp) self.mdfd.update({fn: fobj}) else: fobj = MdFile(fn) fobj.set_md_paragraph(ln, tmdp) self.mdfd[fn] = fobj del tmdp_list
def parse_md(self, filename): mdfile = open('{}/{}'.format(os.getcwd(), filename), 'r') mdobj = MdFile(filename) print('Extracting messages from {} ... '.format(filename)) mdp_list = [] line = 0 # parsing routine per one markdown file while 1: mdstr = mdfile.readline() if not mdstr: break line += 1 # print('%d: %s' % (line, mdstr), end="", flush=True) mdptype = self.mdparser.parse(mdstr) if mdptype == 'blank': continue mdp = MdPara() mdp.set_type(mdptype, mdstr) cline = line # if paragraph is common and is expectable to be a header # by looking forward the next line if mdp.is_headerline(): bmdp = mdp_list[-1] # if 2 line header found if bmdp.is_common(): cline = mdp_list[-1].line_number_list()[-1] del mdp_list[-1] mdp.set_type('header', bmdp.para_msg() + mdp.para_msg()) # if paragraph is code block if mdp.is_codeblock(): while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break # print('%d: %s' % (line, mdstr), end="", flush=True) mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse(mdstr) == 'codeblock': break # if paragraph is inline HTML elif mdp.is_tagopen(): tagcls = '</{}>'.format( re.search('<(\S+)>', mdp.para_msg()).group(1)) while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break # print('%d: %s' % (line, mdstr), end="", flush=True) mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse( mdstr) == 'tagclose' and tagcls == mdstr[:-1]: break elif mdp.is_tableopen(): while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse(mdstr) == 'tableclose': break mdp.add_line_number(cline) mdp_list.append(mdp) # file should be closed after all of lines are parsed mdfile.close() for mdp in mdp_list: # blank line (ignore) if mdp.para_msg() == '\n': continue if mdobj.exist_md_paragraph(mdp.para_msg()): tmdp = mdobj.get_md_paragraph(mdp.para_msg()) tmdp.add_line_number(mdp.line_number_list()[-1]) mdobj.update_md_paragraph(tmdp.para_msg(), tmdp) else: mdobj.set_md_paragraph(mdp.para_msg(), mdp) # clear temporary list del mdp_list return mdobj
def parse_md(self, filename): mdfile = open(os.path.join(os.getcwd(), filename), 'r') mdobj = MdFile(filename) print('Extracting messages from {} ... '.format(filename)) mdp_list = [] line = 0 # parsing routine per one markdown file while 1: mdstr = mdfile.readline() if not mdstr: break line += 1 # print('%d: %s' % (line, mdstr), end="", flush=True) mdptype = self.mdparser.parse(mdstr) mdp = MdPara() mdp.set_type(mdptype, mdstr) cline = line # paragraph of text that's broken into multiple lines if mdp.is_common(): while 1: mdstr = mdfile.readline() line += 1 if not mdstr or self.mdparser.parse(mdstr) == 'blank': break if self.mdparser.parse(mdstr) == 'swclabel': line -= 1 mdfile.seek(mdpos) break # print('%d: %s' % (line, mdstr), end="", flush=True) mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) mdpos = mdfile.tell() # if Front matter yaml section starts # TODO join with codeblock? if mdp.is_yamlblock(): while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break # print('%d: %s' % (line, mdstr), end="", flush=True) mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse(mdstr) == 'yamlblock': break # if paragraph is common and is expectable to be a header # by looking forward the next line if mdp.is_headerline(): bmdp = mdp_list[-1] # if 2 line header found if bmdp.is_common(): cline = mdp_list[-1].line_number_list()[-1] del mdp_list[-1] mdp.set_type('header', bmdp.para_msg() + mdp.para_msg()) # if paragraph is code block if mdp.is_codeblock(): while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break # print('%d: %s' % (line, mdstr), end="", flush=True) mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse(mdstr) == 'codeblock': break # if paragraph is inline HTML elif mdp.is_tagopen(): tagcls = '</{}>'.format( re.search('<(\S+)>', mdp.para_msg()).group(1)) while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break # print('%d: %s' % (line, mdstr), end="", flush=True) mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse( mdstr) == 'tagclose' and tagcls == mdstr[:-1]: break elif mdp.is_tableopen(): while 1: mdstr = mdfile.readline() line += 1 if not mdstr: break mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr) if self.mdparser.parse(mdstr) == 'tableclose': break mdp.add_line_number(cline) mdp_list.append(mdp) # file should be closed after all of lines are parsed mdfile.close() for mdp in mdp_list: # blank line (ignore) if mdp.para_msg() == '\n': continue if mdobj.exist_md_paragraph(mdp.para_msg()): tmdp = mdobj.get_md_paragraph(mdp.para_msg()) tmdp.add_line_number(mdp.line_number_list()[-1]) mdobj.update_md_paragraph(tmdp.para_msg(), tmdp) else: mdobj.set_md_paragraph(mdp.para_msg(), mdp) # clear temporary list del mdp_list return mdobj
class PoCompiler: def __init__(self): self.sourcedir = '{}/po'.format(os.getcwd()) self.outputdir = '{}/locale'.format(os.getcwd()) self.outfile = None self.last_trans_name = [] self.last_trans_email = [] self.last_trans_year = [] self.langlist = [] self.clocale = '' self.poparser = PoParser() self.crpt = r'^# This file is distributed under the same license as ' \ r'the [a-zA-Z0-9\-\.\_]+ package\.\n?$' self.creditpt = '^#\s([A-Z][a-z\W]+)([\-\s][A-Z][a-z\W]+)+' \ '\s\<[a-zA-Z0-9\.\-_]+\@'\ '[a-zA-Z0-9\-_]+(\.[a-zA-Z0-9\-_]+)+\>' \ '(,\s([1-2][0-9]{3})(\-([1-2][0-9]{3}))?)+\.\n?$' self.langteam = r'^\"Language\-Team: ' \ r'([A-Z][a-z]+)\s\<[a-zA-Z0-9\.\-_]+\@' \ r'[a-zA-Z0-9\-_]+(\.[a-zA-Z0-9\-_]+)+\>\\n\"\n?$' self.cmeta = False self.pofile = None self.mdfile = None self.pomsg = None self.f = None def verify(self): exist_copyright = False exist_credit = False while 1: poline = self.f.readline()[:-1] pt = self.poparser.parse(poline) # end of verification if pt == 'blankline': break # check header description elif pt == 'headerdesc': if re.match(self.crpt, poline): exist_copyright = True elif re.match(r'{}'.format(self.creditpt), poline): self.last_trans_name.append(poline.split('<')[0].strip()) self.last_trans_email.append( poline.split('<')[1].split('>')[0]) years = poline[:-1].split('>')[1].split(', ')[1] if '-' in years: years = years.split('-')[1] self.last_trans_year.append(years) thisyear = datetime.now(tzlocal()).strftime('%Y') if self.last_trans_year[-1] == thisyear: exist_credit = True # if now is on msgid "", msgstr "" header elif pt == 'msgheader': if not exist_copyright: raise CopyrightException() if not exist_credit: raise MissingCreditException() # check po meta header elif pt == 'headermeta': self.cmeta = True keyval = poline[1:-3].split(': ') if keyval[0] == 'Language': if keyval[1] != self.clocale: raise LocaleMismatchException(keyval[1], self.clocale) elif keyval[0] == 'Last-Translator': name = keyval[1].split('<')[0].strip() email = keyval[1].split('<')[1].split('>')[0].strip() if self.last_trans_name[-1] != name \ and self.last_trans_email[-1] != email: raise CreditMismatchException() elif keyval[0] == 'Language-Team': if not re.match(r'{}'.format(self.langteam), poline) or \ poline == '\"Language-Team: LANGUAGE <*****@*****.**>\n\"': raise LangTeamMissingException() def get_msg_set(self, poline): msgid = poline[:-2].split(' "')[1] poline = self.f.readline() while self.poparser.parse(poline) != 'msgstr': msgid += poline[1:-2] poline = self.f.readline() msgstr = poline[:-2].split(' "')[1] poline = self.f.readline() while self.poparser.parse(poline) != 'blankline': msgstr += poline[1:-2] poline = self.f.readline() # print ('{}, {}'.format(msgid, msgstr)) return msgid, msgstr def add_msg_set(self, line, msgid, msgstr): self.pomsg = PoMessage() self.pomsg.set_msgid(msgid) self.pomsg.set_msgstr(msgstr) cmdf = line.split(':')[0] clnn = int(line.split(':')[1]) if self.pofile.exist_msg_object(cmdf): self.mdfile = self.pofile.msg_object_by_tfile(cmdf) self.mdfile.set_md_paragraph(clnn, self.pomsg) self.pofile.update_msg_object(cmdf, self.mdfile) else: if not self.mdfile: self.mdfile = MdFile(cmdf) else: if self.mdfile.md_filename() != cmdf: self.mdfile = MdFile(cmdf) self.mdfile.set_md_paragraph(clnn, self.pomsg) self.pofile.add_msg_object(cmdf, self.mdfile) def analysis(self): poline = '' fnarr = [] line = 0 # skip until it reached to blank line while self.poparser.parse(poline) != 'blankline': poline = self.f.readline() self.pofile = PoFile(self.f.name) # structural analysis while 1: poline = self.f.readline() line += 1 if not poline: break pt = self.poparser.parse(poline) if pt == 'headerdesc' or pt == 'blankline': continue elif pt == 'stringorgn': # print (poline) if poline.count(' ') > 1: fa = poline[3:-1].split(' ') for f in fa: fnarr.append(f) # print (fnarr) else: fnarr.append(poline[3:-1]) elif pt == 'fuzzy': # consider that this isn't translated poline = self.f.readline() msgid, msgstr = self.get_msg_set(poline) for fn in fnarr: self.add_msg_set(fn, msgid, '') del fnarr fnarr = [] elif (pt == 'msgid' or pt == 'msgheader') and self.cmeta: msgid, msgstr = self.get_msg_set(poline) for fn in fnarr: self.add_msg_set(fn, msgid, msgstr) del fnarr fnarr = [] def compile(self): # write translation to the target # get list of MdFile() mdlist = self.pofile.msg_object() for mdfn in sorted(mdlist.keys()): mdfo = mdlist.get(mdfn) tpath = '{}/{}/{}'.format(self.outputdir, self.clocale, mdfn.split('/')[0]) if not os.path.exists(tpath) and '/' in mdfn: os.makedirs(tpath, 0o755) # writeout to md self.outfile = open( '{}/{}/{}'.format(self.outputdir, self.clocale, mdfn), 'w') print('Writing {} ... '.format(self.outfile.name), end='') # cln : current line number # nln : new line number # lim : limit count (refer below code) # lndf : line number difference (nln - cln) cln, nln, lim, lndf = (0, 0, 0, 0) # new line character flag lim = 0 # get list of PoMessage() pomarr = mdfo.md_paragraph() for poln in sorted(pomarr.keys()): if cln == 0: nln = poln else: nln = poln lndf = nln - cln while lndf > lim: self.outfile.write('\n') lndf -= 1 pomobj = pomarr.get(poln) if len(pomobj.msgstr()) == 0: lim = pomobj.msgid().count('\\n') self.outfile.write( pomobj.msgid() .replace('\\n', '\n') .replace('\\"', '\"') .replace('\\\\', '\\')) else: lim = pomobj.msgstr().count('\\n') self.outfile.write( pomobj.msgstr() .replace('\\n', '\n') .replace('\\"', '\"') .replace('\\\\', '\\')) cln = nln self.outfile.write('\n\n') self.outfile.close() print('OK') def run(self): cll = open('{}/LINGUAS'.format(self.sourcedir), 'r').readline() self.langlist = cll[:-1].split(' ') for lang in self.langlist: polist = glob.glob('{}/*.{}.po'.format(self.sourcedir, lang)) if len(polist) != 0: print('Compiling *.{}.po ...'.format(lang)) self.clocale = lang for po in polist: print('Compiling {} ...'.format(po)) self.f = open(po, 'r') self.verify() self.analysis() self.compile() self.f.close()