Beispiel #1
0
    def remove_redundancy(self):
        tmdp_list = {}

        # MdFile
        for fobj in self.mdf:
            mdfn = fobj.md_filename()
            self.mdp = fobj.md_paragraph()

            # MdPara
            for msg in self.mdp.keys():
                pobj = self.mdp[msg]
                lnlist = pobj.line_number_list()

                if msg in tmdp_list.keys():
                    tmdp = tmdp_list[msg]
                    for ln in lnlist:
                        tmdp.add_line_number('{}:{}'.format(mdfn, ln))
                    tmdp_list.update({msg: tmdp})
                else:
                    tmdp = MdPara()
                    tmdp.set_type(pobj.para_type(), pobj.para_msg())
                    for ln in lnlist:
                        tmdp.add_line_number('{}:{}'.format(mdfn, ln))
                    tmdp_list[msg] = tmdp

        del self.mdf
        self.mdfd = {}

        for msg in tmdp_list.keys():
            tmdp = tmdp_list[msg]
            fnln = tmdp.line_number_list()[0].split(':')
            fn = fnln[0]
            ln = int(fnln[1])
            if fn in self.mdfd.keys():
                fobj = self.mdfd.get(fn)
                fobj.set_md_paragraph(ln, tmdp)
                self.mdfd.update({fn: fobj})
            else:
                fobj = MdFile(fn)
                fobj.set_md_paragraph(ln, tmdp)
                self.mdfd[fn] = fobj

        del tmdp_list
Beispiel #2
0
    def remove_redundancy(self):
        tmdp_list = {}

        # MdFile
        for fobj in self.mdf:
            mdfn = fobj.md_filename()
            self.mdp = fobj.md_paragraph()

            # MdPara
            for msg in self.mdp.keys():
                pobj = self.mdp[msg]
                lnlist = pobj.line_number_list()

                if msg in tmdp_list.keys():
                    tmdp = tmdp_list[msg]
                    for ln in lnlist:
                        tmdp.add_line_number('{}:{}'.format(mdfn, ln))
                    tmdp_list.update({msg: tmdp})
                else:
                    tmdp = MdPara()
                    tmdp.set_type(pobj.para_type(), pobj.para_msg())
                    for ln in lnlist:
                        tmdp.add_line_number('{}:{}'.format(mdfn, ln))
                    tmdp_list[msg] = tmdp

        del self.mdf
        self.mdfd = {}

        for msg in tmdp_list.keys():
            tmdp = tmdp_list[msg]
            fnln = tmdp.line_number_list()[0].split(':')
            fn = fnln[0]
            ln = int(fnln[1])
            if fn in self.mdfd.keys():
                fobj = self.mdfd.get(fn)
                fobj.set_md_paragraph(ln, tmdp)
                self.mdfd.update({fn: fobj})
            else:
                fobj = MdFile(fn)
                fobj.set_md_paragraph(ln, tmdp)
                self.mdfd[fn] = fobj

        del tmdp_list
Beispiel #3
0
    def add_msg_set(self, line, msgid, msgstr):
        self.pomsg = PoMessage()
        self.pomsg.set_msgid(msgid)
        self.pomsg.set_msgstr(msgstr)

        cmdf = line.split(':')[0]
        clnn = int(line.split(':')[1])

        if self.pofile.exist_msg_object(cmdf):
            self.mdfile = self.pofile.msg_object_by_tfile(cmdf)
            self.mdfile.set_md_paragraph(clnn, self.pomsg)
            self.pofile.update_msg_object(cmdf, self.mdfile)
        else:
            if not self.mdfile:
                self.mdfile = MdFile(cmdf)
            else:
                if self.mdfile.md_filename() != cmdf:
                    self.mdfile = MdFile(cmdf)

            self.mdfile.set_md_paragraph(clnn, self.pomsg)
            self.pofile.add_msg_object(cmdf, self.mdfile)
Beispiel #4
0
    def parse_md(self, filename):
        mdfile = open('{}/{}'.format(os.getcwd(), filename), 'r')
        mdobj = MdFile(filename)

        print('Extracting messages from {} ... '.format(filename))
        mdp_list = []
        line = 0

        # parsing routine per one markdown file
        while 1:
            mdstr = mdfile.readline()
            if not mdstr:
                break
            line += 1

            # print('%d: %s' % (line, mdstr), end="", flush=True)
            mdptype = self.mdparser.parse(mdstr)
            if mdptype == 'blank':
                continue

            mdp = MdPara()
            mdp.set_type(mdptype, mdstr)
            cline = line

            # if paragraph is common and is expectable to be a header
            # by looking forward the next line
            if mdp.is_headerline():
                bmdp = mdp_list[-1]

                # if 2 line header found
                if bmdp.is_common():
                    cline = mdp_list[-1].line_number_list()[-1]
                    del mdp_list[-1]
                    mdp.set_type('header', bmdp.para_msg() + mdp.para_msg())

            # if paragraph is code block
            if mdp.is_codeblock():
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break
                    # print('%d: %s' % (line, mdstr), end="", flush=True)
                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(mdstr) == 'codeblock':
                        break

            # if paragraph is inline HTML
            elif mdp.is_tagopen():
                tagcls = '</{}>'.format(
                    re.search('<(\S+)>', mdp.para_msg()).group(1))
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break
                    # print('%d: %s' % (line, mdstr), end="", flush=True)
                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(
                            mdstr) == 'tagclose' and tagcls == mdstr[:-1]:
                        break

            elif mdp.is_tableopen():
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break

                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(mdstr) == 'tableclose':
                        break

            mdp.add_line_number(cline)
            mdp_list.append(mdp)

        # file should be closed after all of lines are parsed
        mdfile.close()

        for mdp in mdp_list:
            # blank line (ignore)
            if mdp.para_msg() == '\n':
                continue

            if mdobj.exist_md_paragraph(mdp.para_msg()):
                tmdp = mdobj.get_md_paragraph(mdp.para_msg())
                tmdp.add_line_number(mdp.line_number_list()[-1])
                mdobj.update_md_paragraph(tmdp.para_msg(), tmdp)
            else:
                mdobj.set_md_paragraph(mdp.para_msg(), mdp)

        # clear temporary list
        del mdp_list

        return mdobj
Beispiel #5
0
    def parse_md(self, filename):
        mdfile = open(os.path.join(os.getcwd(), filename), 'r')
        mdobj = MdFile(filename)

        print('Extracting messages from {} ... '.format(filename))
        mdp_list = []
        line = 0

        # parsing routine per one markdown file
        while 1:
            mdstr = mdfile.readline()
            if not mdstr:
                break
            line += 1

            # print('%d: %s' % (line, mdstr), end="", flush=True)
            mdptype = self.mdparser.parse(mdstr)

            mdp = MdPara()
            mdp.set_type(mdptype, mdstr)
            cline = line

            # paragraph of text that's broken into multiple lines
            if mdp.is_common():
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr or self.mdparser.parse(mdstr) == 'blank':
                        break
                    if self.mdparser.parse(mdstr) == 'swclabel':
                        line -= 1
                        mdfile.seek(mdpos)
                        break

                    # print('%d: %s' % (line, mdstr), end="", flush=True)
                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)
                    mdpos = mdfile.tell()

            # if Front matter yaml section starts # TODO join with codeblock?
            if mdp.is_yamlblock():
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break
                    # print('%d: %s' % (line, mdstr), end="", flush=True)
                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(mdstr) == 'yamlblock':
                        break

            # if paragraph is common and is expectable to be a header
            # by looking forward the next line
            if mdp.is_headerline():
                bmdp = mdp_list[-1]

                # if 2 line header found
                if bmdp.is_common():
                    cline = mdp_list[-1].line_number_list()[-1]
                    del mdp_list[-1]
                    mdp.set_type('header', bmdp.para_msg() + mdp.para_msg())

            # if paragraph is code block
            if mdp.is_codeblock():
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break
                    # print('%d: %s' % (line, mdstr), end="", flush=True)
                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(mdstr) == 'codeblock':
                        break

            # if paragraph is inline HTML
            elif mdp.is_tagopen():
                tagcls = '</{}>'.format(
                    re.search('<(\S+)>', mdp.para_msg()).group(1))
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break
                    # print('%d: %s' % (line, mdstr), end="", flush=True)
                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(
                            mdstr) == 'tagclose' and tagcls == mdstr[:-1]:
                        break

            elif mdp.is_tableopen():
                while 1:
                    mdstr = mdfile.readline()
                    line += 1
                    if not mdstr:
                        break

                    mdp.set_type(mdp.para_type(), mdp.para_msg() + mdstr)

                    if self.mdparser.parse(mdstr) == 'tableclose':
                        break

            mdp.add_line_number(cline)
            mdp_list.append(mdp)

        # file should be closed after all of lines are parsed
        mdfile.close()

        for mdp in mdp_list:
            # blank line (ignore)
            if mdp.para_msg() == '\n':
                continue

            if mdobj.exist_md_paragraph(mdp.para_msg()):
                tmdp = mdobj.get_md_paragraph(mdp.para_msg())
                tmdp.add_line_number(mdp.line_number_list()[-1])
                mdobj.update_md_paragraph(tmdp.para_msg(), tmdp)
            else:
                mdobj.set_md_paragraph(mdp.para_msg(), mdp)

        # clear temporary list
        del mdp_list

        return mdobj
Beispiel #6
0
class PoCompiler:
    def __init__(self):
        self.sourcedir = '{}/po'.format(os.getcwd())
        self.outputdir = '{}/locale'.format(os.getcwd())
        self.outfile = None

        self.last_trans_name = []
        self.last_trans_email = []
        self.last_trans_year = []

        self.langlist = []
        self.clocale = ''

        self.poparser = PoParser()
        self.crpt = r'^# This file is distributed under the same license as ' \
                    r'the [a-zA-Z0-9\-\.\_]+ package\.\n?$'
        self.creditpt = '^#\s([A-Z][a-z\W]+)([\-\s][A-Z][a-z\W]+)+' \
                        '\s\<[a-zA-Z0-9\.\-_]+\@'\
                        '[a-zA-Z0-9\-_]+(\.[a-zA-Z0-9\-_]+)+\>' \
                        '(,\s([1-2][0-9]{3})(\-([1-2][0-9]{3}))?)+\.\n?$'
        self.langteam = r'^\"Language\-Team: ' \
                        r'([A-Z][a-z]+)\s\<[a-zA-Z0-9\.\-_]+\@' \
                        r'[a-zA-Z0-9\-_]+(\.[a-zA-Z0-9\-_]+)+\>\\n\"\n?$'

        self.cmeta = False
        self.pofile = None
        self.mdfile = None
        self.pomsg = None

        self.f = None

    def verify(self):

        exist_copyright = False
        exist_credit = False

        while 1:
            poline = self.f.readline()[:-1]
            pt = self.poparser.parse(poline)

            # end of verification
            if pt == 'blankline':
                break

            # check header description
            elif pt == 'headerdesc':
                if re.match(self.crpt, poline):
                    exist_copyright = True
                elif re.match(r'{}'.format(self.creditpt), poline):
                    self.last_trans_name.append(poline.split('<')[0].strip())
                    self.last_trans_email.append(
                        poline.split('<')[1].split('>')[0])
                    years = poline[:-1].split('>')[1].split(', ')[1]
                    if '-' in years:
                        years = years.split('-')[1]
                    self.last_trans_year.append(years)

                    thisyear = datetime.now(tzlocal()).strftime('%Y')

                    if self.last_trans_year[-1] == thisyear:
                        exist_credit = True

            # if now is on msgid "", msgstr "" header
            elif pt == 'msgheader':
                if not exist_copyright:
                    raise CopyrightException()
                if not exist_credit:
                    raise MissingCreditException()

            # check po meta header
            elif pt == 'headermeta':
                self.cmeta = True
                keyval = poline[1:-3].split(': ')
                if keyval[0] == 'Language':
                    if keyval[1] != self.clocale:
                        raise LocaleMismatchException(keyval[1], self.clocale)
                elif keyval[0] == 'Last-Translator':
                    name = keyval[1].split('<')[0].strip()
                    email = keyval[1].split('<')[1].split('>')[0].strip()
                    if self.last_trans_name[-1] != name \
                            and self.last_trans_email[-1] != email:
                        raise CreditMismatchException()
                elif keyval[0] == 'Language-Team':
                    if not re.match(r'{}'.format(self.langteam), poline) or \
                       poline == '\"Language-Team: LANGUAGE <*****@*****.**>\n\"':
                        raise LangTeamMissingException()

    def get_msg_set(self, poline):
        msgid = poline[:-2].split(' "')[1]
        poline = self.f.readline()
        while self.poparser.parse(poline) != 'msgstr':
            msgid += poline[1:-2]
            poline = self.f.readline()

        msgstr = poline[:-2].split(' "')[1]
        poline = self.f.readline()
        while self.poparser.parse(poline) != 'blankline':
            msgstr += poline[1:-2]
            poline = self.f.readline()

        # print ('{}, {}'.format(msgid, msgstr))

        return msgid, msgstr

    def add_msg_set(self, line, msgid, msgstr):
        self.pomsg = PoMessage()
        self.pomsg.set_msgid(msgid)
        self.pomsg.set_msgstr(msgstr)

        cmdf = line.split(':')[0]
        clnn = int(line.split(':')[1])

        if self.pofile.exist_msg_object(cmdf):
            self.mdfile = self.pofile.msg_object_by_tfile(cmdf)
            self.mdfile.set_md_paragraph(clnn, self.pomsg)
            self.pofile.update_msg_object(cmdf, self.mdfile)
        else:
            if not self.mdfile:
                self.mdfile = MdFile(cmdf)
            else:
                if self.mdfile.md_filename() != cmdf:
                    self.mdfile = MdFile(cmdf)

            self.mdfile.set_md_paragraph(clnn, self.pomsg)
            self.pofile.add_msg_object(cmdf, self.mdfile)

    def analysis(self):
        poline = ''
        fnarr = []
        line = 0

        # skip until it reached to blank line
        while self.poparser.parse(poline) != 'blankline':
            poline = self.f.readline()

        self.pofile = PoFile(self.f.name)

        # structural analysis
        while 1:
            poline = self.f.readline()
            line += 1
            if not poline:
                break

            pt = self.poparser.parse(poline)
            if pt == 'headerdesc' or pt == 'blankline':
                continue

            elif pt == 'stringorgn':
                # print (poline)
                if poline.count(' ') > 1:
                    fa = poline[3:-1].split(' ')
                    for f in fa:
                        fnarr.append(f)
                        # print (fnarr)
                else:
                    fnarr.append(poline[3:-1])

            elif pt == 'fuzzy':
                # consider that this isn't translated
                poline = self.f.readline()
                msgid, msgstr = self.get_msg_set(poline)

                for fn in fnarr:
                    self.add_msg_set(fn, msgid, '')

                del fnarr
                fnarr = []

            elif (pt == 'msgid' or pt == 'msgheader') and self.cmeta:
                msgid, msgstr = self.get_msg_set(poline)

                for fn in fnarr:
                    self.add_msg_set(fn, msgid, msgstr)

                del fnarr
                fnarr = []

    def compile(self):
        # write translation to the target
        # get list of MdFile()
        mdlist = self.pofile.msg_object()
        for mdfn in sorted(mdlist.keys()):
            mdfo = mdlist.get(mdfn)
            tpath = '{}/{}/{}'.format(self.outputdir, self.clocale,
                                      mdfn.split('/')[0])
            if not os.path.exists(tpath) and '/' in mdfn:
                os.makedirs(tpath, 0o755)

            # writeout to md
            self.outfile = open(
                '{}/{}/{}'.format(self.outputdir, self.clocale, mdfn), 'w')
            print('Writing {} ... '.format(self.outfile.name), end='')

            # cln : current line number
            # nln : new line number
            # lim : limit count (refer below code)
            # lndf : line number difference (nln - cln)
            cln, nln, lim, lndf = (0, 0, 0, 0)
            # new line character flag
            lim = 0

            # get list of PoMessage()
            pomarr = mdfo.md_paragraph()
            for poln in sorted(pomarr.keys()):
                if cln == 0:
                    nln = poln
                else:
                    nln = poln
                    lndf = nln - cln

                    while lndf > lim:
                        self.outfile.write('\n')
                        lndf -= 1

                pomobj = pomarr.get(poln)
                if len(pomobj.msgstr()) == 0:
                    lim = pomobj.msgid().count('\\n')
                    self.outfile.write(
                        pomobj.msgid()
                              .replace('\\n', '\n')
                              .replace('\\"', '\"')
                              .replace('\\\\', '\\'))
                else:
                    lim = pomobj.msgstr().count('\\n')
                    self.outfile.write(
                        pomobj.msgstr()
                              .replace('\\n', '\n')
                              .replace('\\"', '\"')
                              .replace('\\\\', '\\'))

                cln = nln

            self.outfile.write('\n\n')
            self.outfile.close()
            print('OK')

    def run(self):
        cll = open('{}/LINGUAS'.format(self.sourcedir), 'r').readline()
        self.langlist = cll[:-1].split(' ')

        for lang in self.langlist:
            polist = glob.glob('{}/*.{}.po'.format(self.sourcedir, lang))
            if len(polist) != 0:
                print('Compiling *.{}.po ...'.format(lang))
            self.clocale = lang
            for po in polist:
                print('Compiling {} ...'.format(po))
                self.f = open(po, 'r')

                self.verify()
                self.analysis()
                self.compile()
                self.f.close()