def convertline(self, line): returnline = u"" # handle multiline msgid if we're in one if self.inmultilinemsgid: msgid = quote.rstripeol(line).strip() # see if there's more self.inmultilinemsgid = (msgid[-1:] == '\\') # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:1] == '#': returnline = quote.rstripeol(line) + eol else: line = quote.rstripeol(line) delimiter_char, delimiter_pos = self.personality.find_delimiter( line) if quote.rstripeol(line)[-1:] == '\\': self.inmultilinemsgid = True if delimiter_pos == -1: key = self.personality.key_strip(line) delimiter = " %s " % self.personality.delimiters[0] else: key = self.personality.key_strip(line[:delimiter_pos]) # Calculate space around the equal sign prespace = line[line.find(' ', len(key)):delimiter_pos] postspacestart = len(line[delimiter_pos + 1:]) postspaceend = len(line[delimiter_pos + 1:].lstrip()) postspace = line[delimiter_pos + 1:delimiter_pos + (postspacestart - postspaceend) + 1] delimiter = prespace + delimiter_char + postspace if key in self.inputstore.locationindex: unit = self.inputstore.locationindex[key] if not unit.istranslated() and bool( unit.source) and self.remove_untranslated: returnline = u"" else: if unit.isfuzzy() and not self.includefuzzy or len( unit.target) == 0: value = unit.source else: value = unit.target self.inecho = False assert isinstance(value, unicode) returnline = "%(key)s%(del)s%(value)s%(term)s%(eol)s" % \ {"key": "%s%s%s" % (self.personality.key_wrap_char, key, self.personality.key_wrap_char), "del": delimiter, "value": "%s%s%s" % (self.personality.value_wrap_char, self.personality.encode(value), self.personality.value_wrap_char), "term": self.personality.pair_terminator, "eol": eol, } else: self.inecho = True returnline = line + eol assert isinstance(returnline, unicode) return returnline
def convertline(self, line): returnline = u"" # handle multiline msgid if we're in one if self.inmultilinemsgid: msgid = quote.rstripeol(line).strip() # see if there's more self.inmultilinemsgid = (msgid[-1:] == '\\') # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:1] == '#': returnline = quote.rstripeol(line) + eol else: line = quote.rstripeol(line) delimiter_char, delimiter_pos = self.personality.find_delimiter(line) if quote.rstripeol(line)[-1:] == '\\': self.inmultilinemsgid = True if delimiter_pos == -1: key = self.personality.key_strip(line) delimiter = " %s " % self.personality.delimiters[0] else: key = self.personality.key_strip(line[:delimiter_pos]) # Calculate space around the equal sign prespace = line[line.find(' ', len(key)):delimiter_pos] postspacestart = len(line[delimiter_pos+1:]) postspaceend = len(line[delimiter_pos+1:].lstrip()) postspace = line[delimiter_pos+1:delimiter_pos+(postspacestart-postspaceend)+1] delimiter = prespace + delimiter_char + postspace if key in self.inputstore.locationindex and not self.inputstore.locationindex[key] is None: unit = self.inputstore.locationindex[key] if self.remove_untranslated and (unit.source == unit.target or unit.isfuzzy() or len(unit.target) == 0): if unit.isfuzzy() and not self.includefuzzy or len(unit.target) == 0: value = unit.source returnline = u"" else: if unit.isfuzzy() and not self.includefuzzy or len(unit.target) == 0: value = unit.source else: value = unit.target self.inecho = False assert isinstance(value, unicode) returnline = "%(key)s%(del)s%(value)s%(term)s%(eol)s" % \ {"key": "%s%s%s" % (self.personality.key_wrap_char, key, self.personality.key_wrap_char), "del": delimiter, "value": "%s%s%s" % (self.personality.value_wrap_char, self.personality.encode(value), self.personality.value_wrap_char), "term": self.personality.pair_terminator, "eol": eol, } else: self.inecho = True returnline = line + eol assert isinstance(returnline, unicode) return returnline
def convertline(self, line): line = unicode(line, 'utf-8') returnline = "" # handle multiline msgid if we're in one if self.inmultilinemsgid: # see if there's more endpos = line.rfind("%s;" % self.quotechar) # if there was no '; or the quote is escaped, we have to continue if endpos >= 0 and line[endpos-1] != '\\': self.inmultilinemsgid = False # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:2] == '//' or line.strip()[:2] == '/*': returnline = quote.rstripeol(line)+eol else: line = quote.rstripeol(line) equalspos = line.find('=') hashpos = line.find("#") # if no equals, just repeat it if equalspos == -1: returnline = quote.rstripeol(line)+eol elif 0 <= hashpos < equalspos: # Assume that this is a '#' comment line returnline = quote.rstripeol(line)+eol # otherwise, this is a definition else: # now deal with the current string... key = line[:equalspos].strip() lookupkey = key.replace(" ", "") # Calculate space around the equal sign prespace = line[len(line[:equalspos].rstrip()):equalspos] postspacestart = len(line[equalspos+1:]) postspaceend = len(line[equalspos+1:].lstrip()) postspace = line[equalspos+1:equalspos+(postspacestart-postspaceend)+1] self.quotechar = line[equalspos+(postspacestart-postspaceend)+1] inlinecomment_pos = line.rfind("%s;" % self.quotechar) if inlinecomment_pos > -1: inlinecomment = line[inlinecomment_pos+2:] else: inlinecomment = "" if self.inputdict.has_key(lookupkey): self.inecho = False value = php.phpencode(self.inputdict[lookupkey], self.quotechar) if isinstance(value, str): value = value.decode('utf8') returnline = key + prespace + "=" + postspace + self.quotechar + value + self.quotechar + ';' + inlinecomment + eol else: self.inecho = True returnline = line+eol # no string termination means carry string on to next line endpos = line.rfind("%s;" % self.quotechar) # if there was no '; or the quote is escaped, we have to continue if endpos == -1 or line[endpos-1] == '\\': self.inmultilinemsgid = True if isinstance(returnline, unicode): returnline = returnline.encode('utf-8') return returnline
def convertline(self, line): returnline = "" # handle multiline msgid if we're in one if self.inmultilinemsgid: msgid = quote.rstripeol(line).strip() # see if there's more self.inmultilinemsgid = (msgid[-1:] == '\\') # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:1] == '#': returnline = quote.rstripeol(line) + eol else: line = quote.rstripeol(line) equalspos = line.find('=') # if no equals, just repeat it if equalspos == -1: returnline = quote.rstripeol(line) + eol # otherwise, this is a definition else: # backslash at end means carry string on to next line if quote.rstripeol(line)[-1:] == '\\': self.inmultilinemsgid = True # now deal with the current string... key = line[:equalspos].strip() # Calculate space around the equal sign prespace = line.lstrip()[line.lstrip().find(' '):equalspos] postspacestart = len(line[equalspos + 1:]) postspaceend = len(line[equalspos + 1:].lstrip()) postspace = line[equalspos + 1:equalspos + (postspacestart - postspaceend) + 1] if self.inputdict.has_key(key): self.inecho = False value = self.inputdict[key] if isinstance(value, str): value = value.decode('utf8') if self.personality == "mozilla" or self.personality == "skype": returnline = key + prespace + "=" + postspace + quote.mozillapropertiesencode( value) + eol else: returnline = key + prespace + "=" + postspace + quote.javapropertiesencode( value) + eol else: self.inecho = True returnline = line + eol if isinstance(returnline, unicode): returnline = returnline.encode('utf-8') return returnline
def convertcomments(self, thedtd, thepo): entity = quote.rstripeol(thedtd.entity) if len(entity) > 0: thepo.addlocation(thedtd.entity) for commenttype, comment in thedtd.comments: # handle groups if (commenttype == "locgroupstart"): groupcomment = comment.replace('BEGIN', 'GROUP') self.currentgroup = groupcomment elif (commenttype == "locgroupend"): groupcomment = comment.replace('END', 'GROUP') self.currentgroup = None # handle automatic comment if commenttype == "automaticcomment": thepo.addnote(comment, origin="developer") # handle normal comments else: thepo.addnote(quote.stripcomment(comment), origin="developer") # handle group stuff if self.currentgroup is not None: thepo.addnote(quote.stripcomment(self.currentgroup), origin="translator") if is_css_entity(entity): thepo.addnote("Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin="developer")
def convertcomments(self, thedtd, thepo): entity = quote.rstripeol(thedtd.entity) if len(entity) > 0: thepo.addlocation(thedtd.entity) for commenttype, comment in thedtd.comments: # handle groups if (commenttype == "locgroupstart"): groupcomment = comment.replace('BEGIN', 'GROUP') self.currentgroup = groupcomment elif (commenttype == "locgroupend"): groupcomment = comment.replace('END', 'GROUP') self.currentgroup = None # handle automatic comment if commenttype == "automaticcomment": thepo.addnote(comment, origin="developer") # handle normal comments else: thepo.addnote(quote.stripcomment(comment), origin="developer") # handle group stuff if self.currentgroup is not None: thepo.addnote(quote.stripcomment(self.currentgroup), origin="translator") if is_css_entity(entity): thepo.addnote( "Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin="developer")
def convertline(self, line): returnline = "" # handle multiline msgid if we're in one if self.inmultilinemsgid: msgid = quote.rstripeol(line).strip() # see if there's more self.inmultilinemsgid = msgid[-1:] == "\\" # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:1] == "#": returnline = quote.rstripeol(line) + eol else: line = quote.rstripeol(line) equalspos = line.find("=") # if no equals, just repeat it if equalspos == -1: returnline = quote.rstripeol(line) + eol # otherwise, this is a definition else: # backslash at end means carry string on to next line if quote.rstripeol(line)[-1:] == "\\": self.inmultilinemsgid = True # now deal with the current string... key = line[:equalspos].strip() # Calculate space around the equal sign prespace = line.lstrip()[line.lstrip().find(" ") : equalspos] postspacestart = len(line[equalspos + 1 :]) postspaceend = len(line[equalspos + 1 :].lstrip()) postspace = line[equalspos + 1 : equalspos + (postspacestart - postspaceend) + 1] if self.inputdict.has_key(key): self.inecho = False value = self.inputdict[key] if isinstance(value, str): value = value.decode("utf8") if self.personality == "mozilla" or self.personality == "skype": returnline = key + prespace + "=" + postspace + quote.mozillapropertiesencode(value) + eol else: returnline = key + prespace + "=" + postspace + quote.javapropertiesencode(value) + eol else: self.inecho = True returnline = line + eol if isinstance(returnline, unicode): returnline = returnline.encode("utf-8") return returnline
def parse(self, propsrc): """Read the source of a properties file in and include them as units.""" text, encoding = self.detect_encoding(propsrc, default_encodings=[self.personality.default_encoding, 'utf-8', 'utf-16']) self.encoding = encoding propsrc = text newunit = propunit("", self.personality.name) inmultilinevalue = False for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = is_line_continuation(newunit.value) # if we're still waiting for more... if inmultilinevalue: # strip the backslash newunit.value = newunit.value[:-1] if not inmultilinevalue: # we're finished, add it to the list... self.addunit(newunit) newunit = propunit("", self.personality.name) # otherwise, this could be a comment # FIXME handle /* */ in a more reliable way # FIXME handle // inline comments elif (line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in (u"/*", u"//") or line.strip()[:-2] == "*/"): # add a comment if line not in self.personality.drop_comments: newunit.comments.append(line) elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line) if delimiter_pos == -1: newunit.name = self.personality.key_strip(line) newunit.value = u"" self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.name = self.personality.key_strip(line[:delimiter_pos]) if is_line_continuation(line[delimiter_pos+1:].lstrip()): inmultilinevalue = True newunit.value = line[delimiter_pos+1:].lstrip()[:-1] else: newunit.value = self.personality.value_strip(line[delimiter_pos+1:]) self.addunit(newunit) newunit = propunit("", self.personality.name) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit)
def parse(self, propsrc): """read the source of a properties file in and include them as units""" newunit = propunit("", self.personality.name) inmultilinevalue = False if self.encoding is not None: propsrc = unicode(propsrc, self.encoding) else: propsrc = unicode(propsrc, self.personality.default_encoding) for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = is_line_continuation(newunit.value) # if we're still waiting for more... if inmultilinevalue: # strip the backslash newunit.value = newunit.value[:-1] if not inmultilinevalue: # we're finished, add it to the list... self.addunit(newunit) newunit = propunit("", self.personality.name) # otherwise, this could be a comment # FIXME handle /* */ in a more reliable way # FIXME handle // inline comments elif line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in ( u"/*", u"//") or line.strip()[:-2] == "*/": # add a comment newunit.comments.append(line) elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.delimiter, delimiter_pos = self.personality.find_delimiter( line) if delimiter_pos == -1: newunit.name = self.personality.key_strip(line) newunit.value = u"" self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.name = self.personality.key_strip( line[:delimiter_pos]) if is_line_continuation(line[delimiter_pos + 1:].lstrip()): inmultilinevalue = True newunit.value = line[delimiter_pos + 1:].lstrip()[:-1] else: newunit.value = self.personality.value_strip( line[delimiter_pos + 1:]) self.addunit(newunit) newunit = propunit("", self.personality.name) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit)
def getlocations(self): """Get a list of locations from sourcecomments in the PO unit rtype: List return: A list of the locations with '#: ' stripped """ locations = [] for sourcecomment in self.sourcecomments: locations += quote.rstripeol(sourcecomment)[3:].split() return locations
def _getmsgpartstr(self, partname, partlines, partcomments=""): if isinstance(partlines, dict): partkeys = sorted(partlines.keys()) return "".join([ self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys ]) partstr = partname + " " partstartline = 0 if len(partlines) > 0 and len(partcomments) == 0: partstr += partlines[0] partstartline = 1 elif len(partcomments) > 0: if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: # if there is a blank leader line, it must come before the comment partstr += partlines[0] + '\n' # but if the whole string is blank, leave it in if len(partlines) > 1: partstartline += 1 else: # All partcomments should start on a newline partstr += '""\n' # combine comments into one if more than one if len(partcomments) > 1: combinedcomment = [] for comment in partcomments: comment = unquotefrompo([comment]) if comment.startswith("_:"): comment = comment[len("_:"):] if comment.endswith("\\n"): comment = comment[:-len("\\n")] #Before we used to strip. Necessary in some cases? combinedcomment.append(comment) partcomments = self.quote("_:%s" % "".join(combinedcomment)) # Strip heading empty line for multiline string, it was already added above if partcomments[0] == '""': partcomments = partcomments[1:] # comments first, no blank leader line needed partstr += "\n".join(partcomments) partstr = quote.rstripeol(partstr) else: partstr += '""' partstr += '\n' # add the rest previous = None for partline in partlines[partstartline:]: # Avoid duplicate empty lines if previous == '""' and partline == '""': continue previous = partline partstr += partline + '\n' return partstr
def getlocations(self): """Get a list of locations from sourcecomments in the PO unit rtype: List return: A list of the locations with '#: ' stripped """ locations = [] for sourcecomment in self.sourcecomments: locations += quote.rstripeol(sourcecomment)[3:].split() for i, loc in enumerate(locations): locations[i] = pocommon.unquote_plus(loc) return locations
def parse(self, propsrc, personality="java"): """read the source of a properties file in and include them as units""" newunit = propunit("", personality) inmultilinevalue = False if personality == "mozilla": propsrc = unicode(propsrc, 'utf-8') else: propsrc = unicode(propsrc, 'latin1') for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = is_line_continuation(newunit.value) # if we're still waiting for more... if inmultilinevalue: # strip the backslash newunit.value = newunit.value[:-1] if not inmultilinevalue: # we're finished, add it to the list... self.addunit(newunit) newunit = propunit("", personality) # otherwise, this could be a comment elif line.strip()[:1] in (u'#', u'!'): # add a comment newunit.comments.append(line) elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", personality) else: delimeter_char, delimeter_pos = find_delimeter(line) if delimeter_pos == -1: continue # otherwise, this is a definition else: newunit.delimeter = delimeter_char newunit.name = key_strip(line[:delimeter_pos]) newunit.value = line[delimeter_pos + 1:].lstrip() # backslash at end means carry string on to next line if is_line_continuation(newunit.value): inmultilinevalue = True newunit.value = newunit.value[:-1] else: self.addunit(newunit) newunit = propunit("", personality) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit)
def parse(self, propsrc, personality="java"): """read the source of a properties file in and include them as units""" newunit = propunit("", personality) inmultilinevalue = False propsrc = unicode(propsrc, default_encoding[personality]) for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = is_line_continuation(newunit.value) # if we're still waiting for more... if inmultilinevalue: # strip the backslash newunit.value = newunit.value[:-1] if not inmultilinevalue: # we're finished, add it to the list... self.addunit(newunit) newunit = propunit("", personality) # otherwise, this could be a comment elif line.strip()[:1] in (u'#', u'!'): # add a comment newunit.comments.append(line) elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", personality) else: delimiter_char, delimiter_pos = find_delimiter(line) if delimiter_pos == -1: continue # otherwise, this is a definition else: newunit.delimiter = delimiter_char newunit.name = key_strip(line[:delimiter_pos]) newunit.value = line[delimiter_pos+1:].lstrip() # backslash at end means carry string on to next line if is_line_continuation(newunit.value): inmultilinevalue = True newunit.value = newunit.value[:-1] else: self.addunit(newunit) newunit = propunit("", personality) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit)
def parse(self, input): """parses lines and adds them to the file""" if not self.filename: self.filename = getattr(input, 'name', '') if hasattr(input, "read"): src = input.read() input.close() else: src = input for line in src.split("\n"): line = quote.rstripeol(line) if not line: continue parts = line.split("\t") thisline = ooline(parts) self.addline(thisline)
def _getmsgpartstr(self, partname, partlines, partcomments=""): if isinstance(partlines, dict): partkeys = partlines.keys() partkeys.sort() return "".join( [ self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys ] ) partstr = partname + " " partstartline = 0 if len(partlines) > 0 and len(partcomments) == 0: partstr += partlines[0] partstartline = 1 elif len(partcomments) > 0: if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: # if there is a blank leader line, it must come before the comment partstr += partlines[0] + "\n" # but if the whole string is blank, leave it in if len(partlines) > 1: partstartline += 1 else: # All partcomments should start on a newline partstr += '""\n' # combine comments into one if more than one if len(partcomments) > 1: combinedcomment = [] for comment in partcomments: comment = unquotefrompo([comment]) if comment.startswith("_:"): comment = comment[len("_:") :] if comment.endswith("\\n"): comment = comment[: -len("\\n")] # Before we used to strip. Necessary in some cases? combinedcomment.append(comment) partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) # comments first, no blank leader line needed partstr += "\n".join(partcomments) partstr = quote.rstripeol(partstr) else: partstr += '""' partstr += "\n" # add the rest for partline in partlines[partstartline:]: partstr += partline + "\n" return partstr
def query(self, tmcontroller, unit): query_str = unit.source translation = [] err = c_int() result = self.lt.translate_session_translate_text( self.session, query_str, self.source_lang, self.target_lang, None, None, err) if result is None: # TODO handle errors and cleanup errors logging.warning( "An error occured while getting a translation: %s" % err) return if not isinstance(result, unicode): result = unicode(result, 'utf-8') # XXX: The encoding is just a guess translation.append({ 'source': query_str, 'target': quote.rstripeol(result), #l10n: Try to keep this as short as possible. Feel free to transliterate in CJK languages for vertical display optimization. 'tmsource': _('libtranslate') }) # TODO: drop any memory used by 'result' self.emit('match-found', query_str, translation)
def query(self, tmcontroller, unit): query_str = unit.source translation = [] err = c_int() result = self.lt.translate_session_translate_text( self.session, query_str, self.source_lang, self.target_lang, None, None, err ) if result is None: # TODO handle errors and cleanup errors logging.warning("An error occured while getting a translation: %s" % err) return if not isinstance(result, unicode): result = unicode(result, 'utf-8') # XXX: The encoding is just a guess translation.append({ 'source': query_str, 'target': quote.rstripeol(result), #l10n: Try to keep this as short as possible. Feel free to transliterate in CJK languages for vertical display optimization. 'tmsource': _('libtranslate') }) # TODO: drop any memory used by 'result' self.emit('match-found', query_str, translation)
def parse(self, propsrc): """Read the source of a properties file in and include them as units. """ text, encoding = self.detect_encoding( propsrc, default_encodings=[ self.personality.default_encoding, 'utf-8', 'utf-16' ]) if not text and propsrc: raise IOError("Cannot detect encoding for %s." % (self.filename or "given string")) self.encoding = encoding propsrc = text newunit = self.UnitClass("", self.personality.name) inmultilinevalue = False inmultilinecomment = False was_header = False for line in propsrc.split("\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = self.personality.is_line_continuation( newunit.value) # if we're still waiting for more... if inmultilinevalue: newunit.value = self.personality.strip_line_continuation( newunit.value) if not inmultilinevalue: # we're finished, add it to the list... newunit.value = self.personality.value_strip(newunit.value) self.addunit(newunit) newunit = self.UnitClass("", self.personality.name) # otherwise, this could be a comment # FIXME handle // inline comments elif ((inmultilinecomment or is_comment_one_line(line) or is_comment_start(line) or is_comment_end(line)) and not self.UnitClass.represents_missing(line)): # add a comment if line not in self.personality.drop_comments: newunit.comments.append(line) if is_comment_start(line): inmultilinecomment = True elif is_comment_end(line): inmultilinecomment = False elif not line.strip(): # this is a blank line... # avoid adding comment only units if newunit.name: self.addunit(newunit) newunit = self.UnitClass("", self.personality.name) elif not was_header and str(newunit).strip(): self.addunit(newunit) newunit = self.UnitClass("", self.personality.name) was_header = True else: newunit.comments.append("") else: ismissing = False if self.UnitClass.represents_missing(line): line = self.UnitClass.strip_missing_part(line) ismissing = True newunit.delimiter, delimiter_pos = self.personality.find_delimiter( line) if delimiter_pos == -1: newunit.name = self.personality.key_strip(line) newunit.value = "" newunit.delimiter = "" newunit.missing = ismissing self.addunit(newunit) newunit = self.UnitClass("", self.personality.name) else: newunit.name = self.personality.key_strip( line[:delimiter_pos]) newunit.missing = ismissing if self.personality.is_line_continuation( line[delimiter_pos + 1:].lstrip()): inmultilinevalue = True newunit.value = line[delimiter_pos + 1:].lstrip()[:-1] newunit.value = self.personality.strip_line_continuation( line[delimiter_pos + 1:].lstrip()) else: newunit.value = self.personality.value_strip( line[delimiter_pos + 1:]) self.addunit(newunit) newunit = self.UnitClass("", self.personality.name) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0 and not (len( newunit.comments) == 1 and not (newunit.comments[0])): self.addunit(newunit) self.fold()
def convertline(self, line): line = six.text_type(line, 'utf-8') returnline = "" # handle multiline msgid if we're in one if self.inmultilinemsgid: # see if there's more endpos = line.rfind("%s%s" % (self.quotechar, self.enddel)) # if there was no '; or the quote is escaped, we have to continue if endpos >= 0 and line[endpos - 1] != '\\': self.inmultilinemsgid = False # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:2] == '//' or line.strip()[:2] == '/*': returnline = quote.rstripeol(line) + eol elif line.lower().replace(" ", "").find('array(') != -1: self.inarray = True self.prename = line[:line.find('=')].strip() + "->" self.equaldel = "=>" self.enddel = "," returnline = quote.rstripeol(line) + eol elif self.inarray and line.find(');') != -1: self.inarray = False self.equaldel = "=" self.enddel = ";" self.prename = "" returnline = quote.rstripeol(line) + eol else: line = quote.rstripeol(line) equalspos = line.find(self.equaldel) hashpos = line.find("#") # if no equals, just repeat it if equalspos == -1: returnline = quote.rstripeol(line) + eol elif 0 <= hashpos < equalspos: # Assume that this is a '#' comment line returnline = quote.rstripeol(line) + eol # otherwise, this is a definition else: # now deal with the current string... key = line[:equalspos].rstrip() lookupkey = self.prename + key.lstrip() # Calculate space around the equal sign prespace = line[len(line[:equalspos].rstrip()):equalspos] postspacestart = len(line[equalspos + len(self.equaldel):]) postspaceend = len(line[equalspos + len(self.equaldel):].lstrip()) postspace = line[equalspos + len(self.equaldel):equalspos + (postspacestart - postspaceend) + len(self.equaldel)] self.quotechar = line[equalspos + (postspacestart - postspaceend) + len(self.equaldel)] inlinecomment_pos = line.rfind("%s%s" % (self.quotechar, self.enddel)) if inlinecomment_pos > -1: inlinecomment = line[inlinecomment_pos + 2:] else: inlinecomment = "" if lookupkey in self.inputstore.locationindex: unit = self.inputstore.locationindex[lookupkey] if ((unit.isfuzzy() and not self.includefuzzy) or len(unit.target) == 0): value = unit.source else: value = unit.target value = php.phpencode(value, self.quotechar) self.inecho = False if isinstance(value, bytes): value = value.decode('utf8') params = { "key": key, "pre": prespace, "del": self.equaldel, "post": postspace, "quote": self.quotechar, "value": value, "enddel": self.enddel, "comment": inlinecomment, "eol": eol, } returnline = ("%(key)s%(pre)s%(del)s%(post)s%(quote)s" "%(value)s%(quote)s%(enddel)s%(comment)s" "%(eol)s" % params) else: self.inecho = True returnline = line + eol # no string termination means carry string on to next line endpos = line.rfind("%s%s" % (self.quotechar, self.enddel)) # if there was no '; or the quote is escaped, we have to # continue if endpos == -1 or line[endpos - 1] == '\\': self.inmultilinemsgid = True if isinstance(returnline, six.text_type): returnline = returnline.encode('utf-8') return returnline
def parse(self, dtdsrc): """read the first dtd element from the source code into this object, return linesprocessed""" self.comments = [] # make all the lists the same self._locfilenotes = self.comments self._locgroupstarts = self.comments self._locgroupends = self.comments self._locnotes = self.comments # self._locfilenotes = [] # self._locgroupstarts = [] # self._locgroupends = [] # self._locnotes = [] # self.comments = [] self.entity = None self.definition = '' if not dtdsrc: return 0 lines = dtdsrc.split("\n") linesprocessed = 0 comment = "" for line in lines: line += "\n" linesprocessed += 1 # print "line(%d,%d): " % (self.incomment,self.inentity),line[:-1] if not self.incomment: if (line.find('<!--') != -1): self.incomment = True self.continuecomment = False # now work out the type of comment, and save it (remember we're not in the comment yet) (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0) if comment.find('LOCALIZATION NOTE') != -1: l = quote.findend(comment, 'LOCALIZATION NOTE') while (comment[l] == ' '): l += 1 if comment.find('FILE', l) == l: self.commenttype = "locfile" elif comment.find('BEGIN', l) == l: self.commenttype = "locgroupstart" elif comment.find('END', l) == l: self.commenttype = "locgroupend" else: self.commenttype = "locnote" else: # plain comment self.commenttype = "comment" #FIXME: bloody entity might share a line with something important elif not self.inentity and re.search("%.*;", line): # now work out the type of comment, and save it (remember we're not in the comment yet) self.comments.append(("comment", line)) line = "" continue if self.incomment: # some kind of comment (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment) # print "comment(%d,%d): " % (self.incomment,self.continuecomment),comment self.continuecomment = self.incomment # strip the comment out of what will be parsed line = line.replace(comment, "", 1) # add a end of line of this is the end of the comment if not self.incomment: if line.isspace(): comment += line line = '' else: comment += '\n' # check if there's actually an entity definition that's commented out # TODO: parse these, store as obsolete messages # if comment.find('<!ENTITY') != -1: # # remove the entity from the comment # comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1) # depending on the type of comment (worked out at the start), put it in the right place # make it record the comment and type as a tuple commentpair = (self.commenttype, comment) if self.commenttype == "locfile": self._locfilenotes.append(commentpair) elif self.commenttype == "locgroupstart": self._locgroupstarts.append(commentpair) elif self.commenttype == "locgroupend": self._locgroupends.append(commentpair) elif self.commenttype == "locnote": self._locnotes.append(commentpair) elif self.commenttype == "comment": self.comments.append(commentpair) if not self.inentity and not self.incomment: entitypos = line.find('<!ENTITY') if entitypos != -1: self.inentity = True beforeentity = line[:entitypos].strip() if beforeentity.startswith("#"): self.hashprefix = beforeentity self.entitypart = "start" else: self.unparsedlines.append(line) if self.inentity: if self.entitypart == "start": # the entity definition e = quote.findend(line, '<!ENTITY') line = line[e:] self.entitypart = "name" self.entitytype = "internal" if self.entitypart == "name": s = 0 e = 0 while (e < len(line) and line[e].isspace()): e += 1 self.space_pre_entity = ' ' * (e - s) s = e self.entity = '' if (e < len(line) and line[e] == '%'): self.entitytype = "external" self.entityparameter = "" e += 1 while (e < len(line) and line[e].isspace()): e += 1 while (e < len(line) and not line[e].isspace()): self.entity += line[e] e += 1 s = e assert quote.rstripeol(self.entity) == self.entity while (e < len(line) and line[e].isspace()): e += 1 self.space_pre_definition = ' ' * (e - s) if self.entity: if self.entitytype == "external": self.entitypart = "parameter" else: self.entitypart = "definition" # remember the start position and the quote character if e == len(line): self.entityhelp = None e = 0 continue elif self.entitypart == "definition": self.entityhelp = (e, line[e]) self.instring = False if self.entitypart == "parameter": while (e < len(line) and line[e].isspace()): e += 1 paramstart = e while (e < len(line) and line[e].isalnum()): e += 1 self.entityparameter += line[paramstart:e] while (e < len(line) and line[e].isspace()): e += 1 line = line[e:] e = 0 if not line: continue if line[0] in ('"', "'"): self.entitypart = "definition" self.entityhelp = (e, line[e]) self.instring = False if self.entitypart == "definition": if self.entityhelp is None: e = 0 while (e < len(line) and line[e].isspace()): e += 1 if e == len(line): continue self.entityhelp = (e, line[e]) self.instring = False # actually the lines below should remember instring, rather than using it as dummy e = self.entityhelp[0] if (self.entityhelp[1] == "'"): (defpart, self.instring) = quote.extract( line[e:], "'", "'", startinstring=self.instring, allowreentry=False) elif (self.entityhelp[1] == '"'): (defpart, self.instring) = quote.extract( line[e:], '"', '"', startinstring=self.instring, allowreentry=False) else: raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1])) # for any following lines, start at the beginning of the line. remember the quote character self.entityhelp = (0, self.entityhelp[1]) self.definition += defpart if not self.instring: self.closing = line[e + len(defpart):].rstrip("\n\r") self.inentity = False break # uncomment this line to debug processing if 0: for attr in dir(self): r = repr(getattr(self, attr)) if len(r) > 60: r = r[:57] + "..." self.comments.append(("comment", "self.%s = %s" % (attr, r))) return linesprocessed
def getlocations(self): """Return the entity as location (identifier).""" assert quote.rstripeol(self.entity) == self.entity return [self.entity]
def parse(self, propsrc): """Read the source of a properties file in and include them as units. """ text, encoding = self.detect_encoding( propsrc, default_encodings=[self.personality.default_encoding, 'utf-8', 'utf-16']) if not text and propsrc: raise IOError("Cannot detect encoding for %s." % (self.filename or "given string")) self.encoding = encoding propsrc = text newunit = propunit("", self.personality.name) inmultilinevalue = False inmultilinecomment = False for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = self.personality.is_line_continuation( newunit.value) # if we're still waiting for more... if inmultilinevalue: newunit.value = self.personality.strip_line_continuation( newunit.value) if not inmultilinevalue: # we're finished, add it to the list... newunit.value = self.personality.value_strip(newunit.value) self.addunit(newunit) newunit = propunit("", self.personality.name) # otherwise, this could be a comment # FIXME handle // inline comments elif (inmultilinecomment or is_comment_one_line(line) or is_comment_start(line) or is_comment_end(line)): # add a comment if line not in self.personality.drop_comments: newunit.comments.append(line) if is_comment_start(line): inmultilinecomment = True elif is_comment_end(line): inmultilinecomment = False elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line) if delimiter_pos == -1: newunit.name = self.personality.key_strip(line) newunit.value = u"" newunit.delimiter = u"" self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.name = self.personality.key_strip(line[:delimiter_pos]) if self.personality.is_line_continuation( line[delimiter_pos+1:].lstrip()): inmultilinevalue = True newunit.value = line[delimiter_pos+1:].lstrip()[:-1] newunit.value = self.personality.strip_line_continuation( line[delimiter_pos+1:].lstrip()) else: newunit.value = self.personality.value_strip(line[delimiter_pos+1:]) self.addunit(newunit) newunit = propunit("", self.personality.name) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit)
def parse(self, dtdsrc): """read the first dtd element from the source code into this object, return linesprocessed""" self.comments = [] # make all the lists the same self._locfilenotes = self.comments self._locgroupstarts = self.comments self._locgroupends = self.comments self._locnotes = self.comments # self._locfilenotes = [] # self._locgroupstarts = [] # self._locgroupends = [] # self._locnotes = [] # self.comments = [] self.entity = None self.definition = '' if not dtdsrc: return 0 lines = dtdsrc.split("\n") linesprocessed = 0 comment = "" for line in lines: line += "\n" linesprocessed += 1 if not self.incomment: if (line.find('<!--') != -1): self.incomment = True self.continuecomment = False # now work out the type of comment, and save it (remember we're not in the comment yet) (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0) if comment.find('LOCALIZATION NOTE') != -1: l = quote.findend(comment, 'LOCALIZATION NOTE') while (comment[l] == ' '): l += 1 if comment.find('FILE', l) == l: self.commenttype = "locfile" elif comment.find('BEGIN', l) == l: self.commenttype = "locgroupstart" elif comment.find('END', l) == l: self.commenttype = "locgroupend" else: self.commenttype = "locnote" else: # plain comment self.commenttype = "comment" #FIXME: bloody entity might share a line with something important elif not self.inentity and re.search("%.*;", line): # now work out the type of comment, and save it (remember we're not in the comment yet) self.comments.append(("comment", line)) line = "" continue if self.incomment: # some kind of comment (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment) self.continuecomment = self.incomment # strip the comment out of what will be parsed line = line.replace(comment, "", 1) # add a end of line of this is the end of the comment if not self.incomment: if line.isspace(): comment += line line = '' else: comment += '\n' # check if there's actually an entity definition that's commented out # TODO: parse these, store as obsolete messages # if comment.find('<!ENTITY') != -1: # # remove the entity from the comment # comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1) # depending on the type of comment (worked out at the start), put it in the right place # make it record the comment and type as a tuple commentpair = (self.commenttype, comment) if self.commenttype == "locfile": self._locfilenotes.append(commentpair) elif self.commenttype == "locgroupstart": self._locgroupstarts.append(commentpair) elif self.commenttype == "locgroupend": self._locgroupends.append(commentpair) elif self.commenttype == "locnote": self._locnotes.append(commentpair) elif self.commenttype == "comment": self.comments.append(commentpair) if not self.inentity and not self.incomment: entitypos = line.find('<!ENTITY') if entitypos != -1: self.inentity = True beforeentity = line[:entitypos].strip() if beforeentity.startswith("#"): self.hashprefix = beforeentity self.entitypart = "start" else: self.unparsedlines.append(line) if self.inentity: if self.entitypart == "start": # the entity definition e = quote.findend(line, '<!ENTITY') line = line[e:] self.entitypart = "name" self.entitytype = "internal" if self.entitypart == "name": s = 0 e = 0 while (e < len(line) and line[e].isspace()): e += 1 self.space_pre_entity = ' ' * (e - s) s = e self.entity = '' if (e < len(line) and line[e] == '%'): self.entitytype = "external" self.entityparameter = "" e += 1 while (e < len(line) and line[e].isspace()): e += 1 while (e < len(line) and not line[e].isspace()): self.entity += line[e] e += 1 s = e assert quote.rstripeol(self.entity) == self.entity while (e < len(line) and line[e].isspace()): e += 1 self.space_pre_definition = ' ' * (e - s) if self.entity: if self.entitytype == "external": self.entitypart = "parameter" else: self.entitypart = "definition" # remember the start position and the quote character if e == len(line): self.entityhelp = None e = 0 continue elif self.entitypart == "definition": self.entityhelp = (e, line[e]) self.instring = False if self.entitypart == "parameter": while (e < len(line) and line[e].isspace()): e += 1 paramstart = e while (e < len(line) and line[e].isalnum()): e += 1 self.entityparameter += line[paramstart:e] while (e < len(line) and line[e].isspace()): e += 1 line = line[e:] e = 0 if not line: continue if line[0] in ('"', "'"): self.entitypart = "definition" self.entityhelp = (e, line[e]) self.instring = False if self.entitypart == "definition": if self.entityhelp is None: e = 0 while (e < len(line) and line[e].isspace()): e += 1 if e == len(line): continue self.entityhelp = (e, line[e]) self.instring = False # actually the lines below should remember instring, rather than using it as dummy e = self.entityhelp[0] if (self.entityhelp[1] == "'"): (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False) elif (self.entityhelp[1] == '"'): (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False) else: raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1])) # for any following lines, start at the beginning of the line. remember the quote character self.entityhelp = (0, self.entityhelp[1]) self.definition += defpart if not self.instring: self.closing = line[e+len(defpart):].rstrip("\n\r") self.inentity = False break # uncomment this line to debug processing if 0: for attr in dir(self): r = repr(getattr(self, attr)) if len(r) > 60: r = r[:57] + "..." self.comments.append(("comment", "self.%s = %s" % (attr, r))) return linesprocessed
def convertline(self, line): line = unicode(line, 'utf-8') returnline = "" # handle multiline msgid if we're in one if self.inmultilinemsgid: # see if there's more endpos = line.rfind("%s%s" % (self.quotechar, self.enddel)) # if there was no '; or the quote is escaped, we have to continue if endpos >= 0 and line[endpos-1] != '\\': self.inmultilinemsgid = False # if we're echoing... if self.inecho: returnline = line # otherwise, this could be a comment elif line.strip()[:2] == '//' or line.strip()[:2] == '/*': returnline = quote.rstripeol(line) + eol elif line.find('array(') != -1: self.inarray = True self.prename = line[:line.find('=')].strip() + "->" self.equaldel = "=>" self.enddel = "," returnline = quote.rstripeol(line) + eol elif self.inarray and line.find(');') != -1: self.inarray = False self.equaldel = "=" self.enddel = ";" self.prename = "" returnline = quote.rstripeol(line) + eol else: line = quote.rstripeol(line) equalspos = line.find(self.equaldel) hashpos = line.find("#") # if no equals, just repeat it if equalspos == -1: returnline = quote.rstripeol(line) + eol elif 0 <= hashpos < equalspos: # Assume that this is a '#' comment line returnline = quote.rstripeol(line) + eol # otherwise, this is a definition else: # now deal with the current string... key = line[:equalspos].rstrip() lookupkey = self.prename + key.lstrip() # Calculate space around the equal sign prespace = line[len(line[:equalspos].rstrip()):equalspos] postspacestart = len(line[equalspos+len(self.equaldel):]) postspaceend = len(line[equalspos+len(self.equaldel):].lstrip()) postspace = line[equalspos+len(self.equaldel):equalspos+(postspacestart-postspaceend)+len(self.equaldel)] self.quotechar = line[equalspos+(postspacestart-postspaceend)+len(self.equaldel)] inlinecomment_pos = line.rfind("%s%s" % (self.quotechar, self.enddel)) if inlinecomment_pos > -1: inlinecomment = line[inlinecomment_pos+2:] else: inlinecomment = "" if lookupkey in self.inputstore.locationindex: unit = self.inputstore.locationindex[lookupkey] if (unit.isfuzzy() and not self.includefuzzy) or len(unit.target) == 0: value = unit.source else: value = unit.target value = php.phpencode(value, self.quotechar) self.inecho = False if isinstance(value, str): value = value.decode('utf8') returnline = "%(key)s%(pre)s%(del)s%(post)s%(quote)s%(value)s%(quote)s%(enddel)s%(comment)s%(eol)s" % { "key": key, "pre": prespace, "del": self.equaldel, "post": postspace, "quote": self.quotechar, "value": value, "enddel": self.enddel, "comment": inlinecomment, "eol": eol, } else: self.inecho = True returnline = line + eol # no string termination means carry string on to next line endpos = line.rfind("%s%s" % (self.quotechar, self.enddel)) # if there was no '; or the quote is escaped, we have to # continue if endpos == -1 or line[endpos-1] == '\\': self.inmultilinemsgid = True if isinstance(returnline, unicode): returnline = returnline.encode('utf-8') return returnline
def parse(self, propsrc): """Read the source of a properties file in and include them as units.""" text, encoding = self.detect_encoding( propsrc, default_encodings=[ self.personality.default_encoding, 'utf-8', 'utf-16' ]) if not text: raise IOError("Cannot detect encoding for %s" % self.filename) self.encoding = encoding propsrc = text newunit = propunit("", self.personality.name) inmultilinevalue = False inmultilinecomment = False for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = is_line_continuation(newunit.value) # if we're still waiting for more... if inmultilinevalue: # strip the backslash newunit.value = newunit.value[:-1] if not inmultilinevalue: # we're finished, add it to the list... self.addunit(newunit) newunit = propunit("", self.personality.name) # otherwise, this could be a comment # FIXME handle // inline comments elif (inmultilinecomment or is_comment_one_line(line) or is_comment_start(line) or is_comment_end(line)): # add a comment if line not in self.personality.drop_comments: newunit.comments.append(line) if is_comment_start(line): inmultilinecomment = True elif is_comment_end(line): inmultilinecomment = False elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.delimiter, delimiter_pos = self.personality.find_delimiter( line) if delimiter_pos == -1: newunit.name = self.personality.key_strip(line) newunit.value = u"" self.addunit(newunit) newunit = propunit("", self.personality.name) else: newunit.name = self.personality.key_strip( line[:delimiter_pos]) if is_line_continuation(line[delimiter_pos + 1:].lstrip()): inmultilinevalue = True newunit.value = line[delimiter_pos + 1:].lstrip()[:-1] else: newunit.value = self.personality.value_strip( line[delimiter_pos + 1:]) self.addunit(newunit) newunit = propunit("", self.personality.name) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit)