Esempio n. 1
0
    def convertunit(self, dtd_unit):
        """Converts a simple (non-mixed) dtd unit into a po unit.

        Returns None if empty or not for translation.
        """
        if dtd_unit is None:
            return None
        po_unit = po.pounit(encoding="UTF-8")
        # remove unwanted stuff
        for commentnum in range(len(dtd_unit.comments)):
            commenttype, locnote = dtd_unit.comments[commentnum]
            # if this is a localization note
            if commenttype == 'locnote':
                # parse the locnote into the entity and the actual note
                typeend = quote.findend(locnote, 'LOCALIZATION NOTE')
                # parse the id
                idstart = locnote.find('(', typeend)
                if idstart == -1:
                    continue
                idend = locnote.find(')', (idstart + 1))
                entity = locnote[idstart+1:idend].strip()
                # parse the actual note
                actualnotestart = locnote.find(':', (idend + 1))
                actualnoteend = locnote.find('-->', idend)
                actualnote = locnote[actualnotestart+1:actualnoteend].strip()
                # if it's for this entity, process it
                if dtd_unit.getid() == entity:
                    # if it says don't translate (and nothing more),
                    if actualnote.startswith("DONT_TRANSLATE"):
                        # take out the entity,definition and the
                        # DONT_TRANSLATE comment
                        dtd_unit.setid("")
                        dtd_unit.source = ""
                        del dtd_unit.comments[commentnum]
                        # finished this for loop
                        break
                    else:
                        # convert it into an automatic comment, to be
                        # processed by convertcomments
                        dtd_unit.comments[commentnum] = ("automaticcomment",
                                                       actualnote)
        # do a standard translation
        self.convertcomments(dtd_unit, po_unit)
        self.convertstrings(dtd_unit, po_unit)
        if po_unit.isblank() and not po_unit.getlocations():
            return None
        else:
            return po_unit
Esempio n. 2
0
    def convertunit(self, dtd_unit):
        """Converts a simple (non-mixed) dtd unit into a po unit.

        Returns None if empty or not for translation.
        """
        if dtd_unit is None:
            return None
        po_unit = po.pounit(encoding="UTF-8")
        # remove unwanted stuff
        for commentnum in range(len(dtd_unit.comments)):
            commenttype, locnote = dtd_unit.comments[commentnum]
            # if this is a localization note
            if commenttype == 'locnote':
                # parse the locnote into the entity and the actual note
                typeend = quote.findend(locnote, 'LOCALIZATION NOTE')
                # parse the id
                idstart = locnote.find('(', typeend)
                if idstart == -1:
                    continue
                idend = locnote.find(')', (idstart + 1))
                entity = locnote[idstart + 1:idend].strip()
                # parse the actual note
                actualnotestart = locnote.find(':', (idend + 1))
                actualnoteend = locnote.find('-->', idend)
                actualnote = locnote[actualnotestart + 1:actualnoteend].strip()
                # if it's for this entity, process it
                if dtd_unit.getid() == entity:
                    # if it says don't translate (and nothing more),
                    if actualnote.startswith("DONT_TRANSLATE"):
                        # take out the entity,definition and the
                        # DONT_TRANSLATE comment
                        dtd_unit.setid("")
                        dtd_unit.source = ""
                        del dtd_unit.comments[commentnum]
                        # finished this for loop
                        break
                    else:
                        # convert it into an automatic comment, to be
                        # processed by convertcomments
                        dtd_unit.comments[commentnum] = ("automaticcomment",
                                                         actualnote)
        # do a standard translation
        self.convertcomments(dtd_unit, po_unit)
        self.convertstrings(dtd_unit, po_unit)
        if po_unit.isblank() and not po_unit.getlocations():
            return None
        else:
            return po_unit
Esempio n. 3
0
 def convertunit(self, thedtd):
     """converts a dtd unit to a po unit, returns None if empty or not for
     translation"""
     if thedtd is None:
         return None
     if getattr(thedtd, "entityparameter", None) == "SYSTEM":
         return None
     thepo = po.pounit(encoding="UTF-8")
     # remove unwanted stuff
     for commentnum in range(len(thedtd.comments)):
         commenttype, locnote = thedtd.comments[commentnum]
         # if this is a localization note
         if commenttype == 'locnote':
             # parse the locnote into the entity and the actual note
             typeend = quote.findend(locnote, 'LOCALIZATION NOTE')
             # parse the id
             idstart = locnote.find('(', typeend)
             if idstart == -1:
                 continue
             idend = locnote.find(')', (idstart + 1))
             entity = locnote[idstart+1:idend].strip()
             # parse the actual note
             actualnotestart = locnote.find(':', (idend + 1))
             actualnoteend = locnote.find('-->', idend)
             actualnote = locnote[actualnotestart+1:actualnoteend].strip()
             # if it's for this entity, process it
             if thedtd.entity == entity:
                 # if it says don't translate (and nothing more),
                 if actualnote.startswith("DONT_TRANSLATE"):
                     # take out the entity,definition and the
                     # DONT_TRANSLATE comment
                     thedtd.entity = ""
                     thedtd.definition = ""
                     del thedtd.comments[commentnum]
                     # finished this for loop
                     break
                 else:
                     # convert it into an automatic comment, to be
                     # processed by convertcomments
                     thedtd.comments[commentnum] = ("automaticcomment",
                                                    actualnote)
     # do a standard translation
     self.convertcomments(thedtd, thepo)
     self.convertstrings(thedtd, thepo)
     if thepo.isblank() and not thepo.getlocations():
         return None
     else:
         return thepo
Esempio n. 4
0
 def convertunit(self, thedtd):
     """converts a dtd unit to a po unit, returns None if empty or not for translation"""
     if thedtd is None:
         return None
     if getattr(thedtd, "entityparameter", None) == "SYSTEM":
         return None
     thepo = po.pounit(encoding="UTF-8")
     # remove unwanted stuff
     for commentnum in range(len(thedtd.comments)):
         commenttype, locnote = thedtd.comments[commentnum]
         # if this is a localization note
         if commenttype == 'locnote':
             # parse the locnote into the entity and the actual note
             typeend = quote.findend(locnote, 'LOCALIZATION NOTE')
             # parse the id
             idstart = locnote.find('(', typeend)
             if idstart == -1:
                 continue
             idend = locnote.find(')', idstart + 1)
             entity = locnote[idstart + 1:idend].strip()
             # parse the actual note
             actualnotestart = locnote.find(':', idend + 1)
             actualnoteend = locnote.find('-->', idend)
             actualnote = locnote[actualnotestart + 1:actualnoteend].strip()
             # if it's for this entity, process it
             if thedtd.entity == entity:
                 # if it says don't translate (and nothing more),
                 if actualnote.startswith("DONT_TRANSLATE"):
                     # take out the entity,definition and the DONT_TRANSLATE comment
                     thedtd.entity = ""
                     thedtd.definition = ""
                     del thedtd.comments[commentnum]
                     # finished this for loop
                     break
                 else:
                     # convert it into an automatic comment, to be processed by convertcomments
                     thedtd.comments[commentnum] = ("automaticcomment",
                                                    actualnote)
     # do a standard translation
     self.convertcomments(thedtd, thepo)
     self.convertstrings(thedtd, thepo)
     if thepo.isblank() and not thepo.getlocations():
         return None
     else:
         return thepo
Esempio n. 5
0
    def parse(self, dtdsrc):
        """read the first dtd element from the source code into this object, return linesprocessed"""
        self.comments = []
        # make all the lists the same
        self._locfilenotes = self.comments
        self._locgroupstarts = self.comments
        self._locgroupends = self.comments
        self._locnotes = self.comments
        # self._locfilenotes = []
        # self._locgroupstarts = []
        # self._locgroupends = []
        # self._locnotes = []
        # self.comments = []
        self.entity = None
        self.definition = ''
        if not dtdsrc:
            return 0
        lines = dtdsrc.split("\n")
        linesprocessed = 0
        comment = ""
        for line in lines:
            line += "\n"
            linesprocessed += 1
            # print "line(%d,%d): " % (self.incomment,self.inentity),line[:-1]
            if not self.incomment:
                if (line.find('<!--') != -1):
                    self.incomment = True
                    self.continuecomment = False
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    (comment, dummy) = quote.extract(line, "<!--", "-->", None,
                                                     0)
                    if comment.find('LOCALIZATION NOTE') != -1:
                        l = quote.findend(comment, 'LOCALIZATION NOTE')
                        while (comment[l] == ' '):
                            l += 1
                        if comment.find('FILE', l) == l:
                            self.commenttype = "locfile"
                        elif comment.find('BEGIN', l) == l:
                            self.commenttype = "locgroupstart"
                        elif comment.find('END', l) == l:
                            self.commenttype = "locgroupend"
                        else:
                            self.commenttype = "locnote"
                    else:
                        # plain comment
                        self.commenttype = "comment"
                #FIXME: bloody entity might share a line with something important
                elif not self.inentity and re.search("%.*;", line):
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    self.comments.append(("comment", line))
                    line = ""
                    continue

            if self.incomment:
                # some kind of comment
                (comment,
                 self.incomment) = quote.extract(line, "<!--", "-->", None,
                                                 self.continuecomment)
                # print "comment(%d,%d): " % (self.incomment,self.continuecomment),comment
                self.continuecomment = self.incomment
                # strip the comment out of what will be parsed
                line = line.replace(comment, "", 1)
                # add a end of line of this is the end of the comment
                if not self.incomment:
                    if line.isspace():
                        comment += line
                        line = ''
                    else:
                        comment += '\n'
                # check if there's actually an entity definition that's commented out
                # TODO: parse these, store as obsolete messages
                # if comment.find('<!ENTITY') != -1:
                #     # remove the entity from the comment
                #     comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1)
                # depending on the type of comment (worked out at the start), put it in the right place
                # make it record the comment and type as a tuple
                commentpair = (self.commenttype, comment)
                if self.commenttype == "locfile":
                    self._locfilenotes.append(commentpair)
                elif self.commenttype == "locgroupstart":
                    self._locgroupstarts.append(commentpair)
                elif self.commenttype == "locgroupend":
                    self._locgroupends.append(commentpair)
                elif self.commenttype == "locnote":
                    self._locnotes.append(commentpair)
                elif self.commenttype == "comment":
                    self.comments.append(commentpair)

            if not self.inentity and not self.incomment:
                entitypos = line.find('<!ENTITY')
                if entitypos != -1:
                    self.inentity = True
                    beforeentity = line[:entitypos].strip()
                    if beforeentity.startswith("#"):
                        self.hashprefix = beforeentity
                    self.entitypart = "start"
                else:
                    self.unparsedlines.append(line)

            if self.inentity:
                if self.entitypart == "start":
                    # the entity definition
                    e = quote.findend(line, '<!ENTITY')
                    line = line[e:]
                    self.entitypart = "name"
                    self.entitytype = "internal"
                if self.entitypart == "name":
                    s = 0
                    e = 0
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_entity = ' ' * (e - s)
                    s = e
                    self.entity = ''
                    if (e < len(line) and line[e] == '%'):
                        self.entitytype = "external"
                        self.entityparameter = ""
                        e += 1
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                    while (e < len(line) and not line[e].isspace()):
                        self.entity += line[e]
                        e += 1
                    s = e

                    assert quote.rstripeol(self.entity) == self.entity
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_definition = ' ' * (e - s)
                    if self.entity:
                        if self.entitytype == "external":
                            self.entitypart = "parameter"
                        else:
                            self.entitypart = "definition"
                        # remember the start position and the quote character
                        if e == len(line):
                            self.entityhelp = None
                            e = 0
                            continue
                        elif self.entitypart == "definition":
                            self.entityhelp = (e, line[e])
                            self.instring = False
                if self.entitypart == "parameter":
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    paramstart = e
                    while (e < len(line) and line[e].isalnum()):
                        e += 1
                    self.entityparameter += line[paramstart:e]
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    line = line[e:]
                    e = 0
                    if not line:
                        continue
                    if line[0] in ('"', "'"):
                        self.entitypart = "definition"
                        self.entityhelp = (e, line[e])
                        self.instring = False
                if self.entitypart == "definition":
                    if self.entityhelp is None:
                        e = 0
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                        if e == len(line):
                            continue
                        self.entityhelp = (e, line[e])
                        self.instring = False
                    # actually the lines below should remember instring, rather than using it as dummy
                    e = self.entityhelp[0]
                    if (self.entityhelp[1] == "'"):
                        (defpart, self.instring) = quote.extract(
                            line[e:],
                            "'",
                            "'",
                            startinstring=self.instring,
                            allowreentry=False)
                    elif (self.entityhelp[1] == '"'):
                        (defpart, self.instring) = quote.extract(
                            line[e:],
                            '"',
                            '"',
                            startinstring=self.instring,
                            allowreentry=False)
                    else:
                        raise ValueError("Unexpected quote character... %r" %
                                         (self.entityhelp[1]))
                    # for any following lines, start at the beginning of the line. remember the quote character
                    self.entityhelp = (0, self.entityhelp[1])
                    self.definition += defpart
                    if not self.instring:
                        self.closing = line[e + len(defpart):].rstrip("\n\r")
                        self.inentity = False
                        break

        # uncomment this line to debug processing
        if 0:
            for attr in dir(self):
                r = repr(getattr(self, attr))
                if len(r) > 60:
                    r = r[:57] + "..."
                self.comments.append(("comment", "self.%s = %s" % (attr, r)))
        return linesprocessed
Esempio n. 6
0
    def parse(self, dtdsrc):
        """read the first dtd element from the source code into this object, return linesprocessed"""
        self.comments = []
        # make all the lists the same
        self._locfilenotes = self.comments
        self._locgroupstarts = self.comments
        self._locgroupends = self.comments
        self._locnotes = self.comments
        # self._locfilenotes = []
        # self._locgroupstarts = []
        # self._locgroupends = []
        # self._locnotes = []
        # self.comments = []
        self.entity = None
        self.definition = ''
        if not dtdsrc:
            return 0
        lines = dtdsrc.split("\n")
        linesprocessed = 0
        comment = ""
        for line in lines:
            line += "\n"
            linesprocessed += 1
            if not self.incomment:
                if (line.find('<!--') != -1):
                    self.incomment = True
                    self.continuecomment = False
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0)
                    if comment.find('LOCALIZATION NOTE') != -1:
                        l = quote.findend(comment, 'LOCALIZATION NOTE')
                        while (comment[l] == ' '):
                            l += 1
                        if comment.find('FILE', l) == l:
                            self.commenttype = "locfile"
                        elif comment.find('BEGIN', l) == l:
                            self.commenttype = "locgroupstart"
                        elif comment.find('END', l) == l:
                            self.commenttype = "locgroupend"
                        else:
                            self.commenttype = "locnote"
                    else:
                        # plain comment
                        self.commenttype = "comment"
                #FIXME: bloody entity might share a line with something important
                elif not self.inentity and re.search("%.*;", line):
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    self.comments.append(("comment", line))
                    line = ""
                    continue

            if self.incomment:
                # some kind of comment
                (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment)
                self.continuecomment = self.incomment
                # strip the comment out of what will be parsed
                line = line.replace(comment, "", 1)
                # add a end of line of this is the end of the comment
                if not self.incomment:
                    if line.isspace():
                        comment += line
                        line = ''
                    else:
                        comment += '\n'
                # check if there's actually an entity definition that's commented out
                # TODO: parse these, store as obsolete messages
                # if comment.find('<!ENTITY') != -1:
                #     # remove the entity from the comment
                #     comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1)
                # depending on the type of comment (worked out at the start), put it in the right place
                # make it record the comment and type as a tuple
                commentpair = (self.commenttype, comment)
                if self.commenttype == "locfile":
                    self._locfilenotes.append(commentpair)
                elif self.commenttype == "locgroupstart":
                    self._locgroupstarts.append(commentpair)
                elif self.commenttype == "locgroupend":
                    self._locgroupends.append(commentpair)
                elif self.commenttype == "locnote":
                    self._locnotes.append(commentpair)
                elif self.commenttype == "comment":
                    self.comments.append(commentpair)

            if not self.inentity and not self.incomment:
                entitypos = line.find('<!ENTITY')
                if entitypos != -1:
                    self.inentity = True
                    beforeentity = line[:entitypos].strip()
                    if beforeentity.startswith("#"):
                        self.hashprefix = beforeentity
                    self.entitypart = "start"
                else:
                    self.unparsedlines.append(line)

            if self.inentity:
                if self.entitypart == "start":
                    # the entity definition
                    e = quote.findend(line, '<!ENTITY')
                    line = line[e:]
                    self.entitypart = "name"
                    self.entitytype = "internal"
                if self.entitypart == "name":
                    s = 0
                    e = 0
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_entity = ' ' * (e - s)
                    s = e
                    self.entity = ''
                    if (e < len(line) and line[e] == '%'):
                        self.entitytype = "external"
                        self.entityparameter = ""
                        e += 1
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                    while (e < len(line) and not line[e].isspace()):
                        self.entity += line[e]
                        e += 1
                    s = e

                    assert quote.rstripeol(self.entity) == self.entity
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_definition = ' ' * (e - s)
                    if self.entity:
                        if self.entitytype == "external":
                            self.entitypart = "parameter"
                        else:
                            self.entitypart = "definition"
                        # remember the start position and the quote character
                        if e == len(line):
                            self.entityhelp = None
                            e = 0
                            continue
                        elif self.entitypart == "definition":
                            self.entityhelp = (e, line[e])
                            self.instring = False
                if self.entitypart == "parameter":
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    paramstart = e
                    while (e < len(line) and line[e].isalnum()):
                        e += 1
                    self.entityparameter += line[paramstart:e]
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    line = line[e:]
                    e = 0
                    if not line:
                        continue
                    if line[0] in ('"', "'"):
                        self.entitypart = "definition"
                        self.entityhelp = (e, line[e])
                        self.instring = False
                if self.entitypart == "definition":
                    if self.entityhelp is None:
                        e = 0
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                        if e == len(line):
                            continue
                        self.entityhelp = (e, line[e])
                        self.instring = False
                    # actually the lines below should remember instring, rather than using it as dummy
                    e = self.entityhelp[0]
                    if (self.entityhelp[1] == "'"):
                        (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False)
                    elif (self.entityhelp[1] == '"'):
                        (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False)
                    else:
                        raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
                    # for any following lines, start at the beginning of the line. remember the quote character
                    self.entityhelp = (0, self.entityhelp[1])
                    self.definition += defpart
                    if not self.instring:
                        self.closing = line[e+len(defpart):].rstrip("\n\r")
                        self.inentity = False
                        break

        # uncomment this line to debug processing
        if 0:
            for attr in dir(self):
                r = repr(getattr(self, attr))
                if len(r) > 60:
                    r = r[:57] + "..."
                self.comments.append(("comment", "self.%s = %s" % (attr, r)))
        return linesprocessed