def format_suggestions(self, id, response): """clean up open tran suggestion and use the same format as tmserver""" suggestions = self._loads_safe(response) if not suggestions: return [] id = data.forceunicode(id) self.last_suggestions = suggestions results = [] for suggestion in suggestions: #check for fuzzyness at the 'flag' member: for project in suggestion['projects']: if project['flags'] == 0: break else: continue result = {} result['target'] = data.forceunicode(suggestion['text']) result['tmsource'] = suggestion['projects'][0]['name'] result['source'] = data.forceunicode(suggestion['projects'][0]['orig_phrase']) #open-tran often gives too many results with many which can't really be #considered to be suitable for translation memory result['quality'] = self.comparer.similarity(id, result['source'], self.min_similarity) if result['quality'] >= self.min_similarity: results.append(result) results.sort(key=lambda match: match['quality'], reverse=True) results = results[:self.max_candidates] return results
def get_xml_text_value(self, xmltarget): if (len(xmltarget) == 0): # There are no html markups, so unescaping it as plain text. return self.unescape(xmltarget.text) else: # There are html markups, so clone it to perform unescaping for all elements. cloned_target = copy.deepcopy(xmltarget) # Unescaping texts. if (cloned_target.text is not None): cloned_target.text = self.unescape(cloned_target.text) for xmlelement in cloned_target.iterdescendants(): if (xmlelement.text is not None): xmlelement.text = self.unescape(xmlelement.text) if (xmlelement.tail is not None): xmlelement.tail = self.unescape(xmlelement.tail) # Grab root text (using a temporary xml element for text escaping) if (cloned_target.text is not None): tmp_element = etree.Element('t') tmp_element.text = cloned_target.text target = data.forceunicode(etree.tostring(tmp_element, encoding='utf-8')[3:-4]) else: target = u'' # Include markup as well target += u''.join([data.forceunicode(etree.tostring(child, encoding='utf-8')) for child in cloned_target.iterchildren()]) return target
def settarget(self, text): # This is a fairly destructive implementation. Don't assume that this # is necessarily correct in all regards, but it does deal with a lot of # cases. It is hard to deal with plurals. # # Firstly deal with reinitialising to None or setting to identical # string. self._rich_target = None if self.gettarget() == text: return strings = [] if isinstance(text, multistring): strings = text.strings elif isinstance(text, list): strings = text else: strings = [text] targetnode = self._gettargetnode() type = targetnode.get("type") targetnode.clear() if type: targetnode.set("type", type) if self.hasplural() or len(strings) > 1: self.xmlelement.set("numerus", "yes") for string in strings: numerus = etree.SubElement(targetnode, self.namespaced("numerusform")) numerus.text = data.forceunicode(string) or "" # manual, nasty pretty printing. See bug 1420. numerus.tail = "\n " else: targetnode.text = data.forceunicode(text) or "" targetnode.tail = "\n "
def target(self): targetnode = self._gettargetnode() if targetnode is None: etree.SubElement(self.xmlelement, self.namespaced("translation")) return None if self.hasplural(): numerus_nodes = targetnode.findall(self.namespaced("numerusform")) return multistring([data.forceunicode(node.text) or u"" for node in numerus_nodes]) else: return data.forceunicode(targetnode.text) or u""
def fixup(source, response): source = data.forceunicode(source) response = data.forceunicode(response) from translate.filters.autocorrect import correct tmp = correct(source, response) if tmp: response = tmp response = response.replace(u" __::__ ", "\n") # and again for the sake of \n\n: response = response.replace(u"__::__ ", "\n") response = response.replace(u"( ", u"(") for c, repl in punc_tuples: response = response.replace(repl, c) return response
def settarget(self, text, lang='xx', append=False): """Sets the "target" string (second language), or alternatively appends to the list""" #XXX: we really need the language - can't really be optional, and we # need to propagate it if self._rich_target is not None: self._rich_target = None text = data.forceunicode(text) # Firstly deal with reinitialising to None or setting to identical # string if self.gettarget() == text: return languageNode = self.get_target_dom(None) if not text is None: if languageNode is None: languageNode = self.createlanguageNode(lang, text, "target") self.set_target_dom(languageNode, append) else: ''' if self.textNode: terms = languageNode.iter(self.namespaced(self.textNode)) try: languageNode = next(terms) except StopIteration as e: pass languageNode.text = text ''' # try to update all content including tags in targt node, # but here only suits/tested for iws files, so need more checks. # later could need to use a more better method to update target node. self.xmlelement.remove(languageNode) languageNode = self.createlanguageNode(lang, text, "target") self.set_target_dom(languageNode, append) else: self.set_target_dom(None, False)
def settarget(self, text, lang='xx', append=False): """Sets the "target" string (second language), or alternatively appends to the list""" #XXX: we really need the language - can't really be optional, and we # need to propagate it if self._rich_target is not None: self._rich_target = None text = data.forceunicode(text) # Firstly deal with reinitialising to None or setting to identical # string if self.gettarget() == text: return languageNode = self.get_target_dom(None) if not text is None: if languageNode is None: languageNode = self.createlanguageNode(lang, text, "target") self.set_target_dom(languageNode, append) else: if self.textNode: terms = languageNode.iter(self.namespaced(self.textNode)) try: languageNode = terms.next() except StopIteration, e: pass languageNode.text = text
def getsource(self): # TODO: support <byte>. See bug 528. sourcenode = self._getsourcenode() if self.hasplural(): return multistring([sourcenode.text]) else: return data.forceunicode(sourcenode.text)
def addnote(self, text, origin=None, position="append"): """This is modeled on the XLIFF method. See :meth:`translate.storage.xliff.xliffunit.addnote` """ # ignore empty strings and strings without non-space characters if not (text and text.strip()): return text = data.forceunicode(text) commentlist = self.othercomments linestart = "# " autocomments = False if origin in ["programmer", "developer", "source code"]: autocomments = True commentlist = self.automaticcomments linestart = "#. " text = text.split("\n") newcomments = [linestart + line + "\n" for line in text] if position == "append": newcomments = commentlist + newcomments elif position == "prepend": newcomments = newcomments + commentlist if autocomments: self.automaticcomments = newcomments else: self.othercomments = newcomments
def target(self): if self.xmlelement.tag != "plurals": return self.get_xml_text_value(self.xmlelement) return multistring([ data.forceunicode(self.get_xml_text_value(entry)) for entry in self.xmlelement.iterchildren() ])
def setsource(self, source): self._rich_source = None source = data.forceunicode(source) if self.personality == "mozilla" or self.personality == "skype": self.value = quote.mozillapropertiesencode(source or u"") else: self.value = quote.javapropertiesencode(source or u"")
def settarget(self, target): self._rich_target = None target = data.forceunicode(target) if self.personality == "mozilla" or self.personality == "skype": self.translation = quote.mozillapropertiesencode(target or u"") else: self.translation = quote.javapropertiesencode(target or u"")
def addnote(self, text, origin=None, position="append"): """This is modeled on the XLIFF method. See :meth:`translate.storage.xliff.xliffunit.addnote` """ # ignore empty strings and strings without non-space characters if not (text and text.strip()): return text = data.forceunicode(text) commentlist = self.othercomments linestart = "#" autocomments = False if origin in ["programmer", "developer", "source code"]: autocomments = True commentlist = self.automaticcomments linestart = "#." newcomments = [ "".join((linestart, " " if line else "", line, "\n")) for line in text.split("\n") ] if position == "append": newcomments = commentlist + newcomments elif position == "prepend": newcomments = newcomments + commentlist if autocomments: self.automaticcomments = newcomments else: self.othercomments = newcomments
def addnote(self, text, origin=None, position="append"): if origin in ['programmer', 'developer', 'source code', None]: text = data.forceunicode(text) self.comments.append(text) else: return super(propunit, self).addnote(text, origin=origin, position=position)
def setsource(self, source): """Sets the source AND the target to be equal""" source = data.forceunicode(source) if self.personality == "mozilla": self.value = quote.mozillapropertiesencode(source or u"") else: self.value = quote.javapropertiesencode(source or u"")
def gettarget(self, lang=None): # Grab inner text target = self.unescape(self.xmlelement.text or u"") # Include markup as well target += u"".join( [data.forceunicode(etree.tostring(child, encoding="utf-8")) for child in self.xmlelement.iterchildren()] ) return target
def gettarget(self, lang=None): if (self.xmlelement.tag == "plurals"): target = [] for entry in self.xmlelement.iterchildren(): target.append(data.forceunicode(self.get_xml_text_value(entry))) return multistring(target) else: return self.get_xml_text_value(self.xmlelement)
def gettarget(self, lang=None): if self.hasplural(): strings = [data.forceunicode(unit.target) for unit in self.units] if strings: return multistring(strings) else: return None else: return super().gettarget(lang)
def target(self, target): # Firstly deal with reinitialising to None or setting to identical # string. self._rich_target = None if self.target == target: return targetnode = self._gettargetnode() targetnode.clear() targetnode.text = data.forceunicode(target) or u""
def get_xml_text_value(self, xmltarget): # Grab inner text target = self.unescape(xmltarget.text or u'') # Include markup as well target += u''.join([ data.forceunicode(etree.tostring(child, encoding='utf-8')) for child in xmltarget.iterchildren() ]) return target
def gettarget(self, lang=None): # Grab inner text target = (self.xmlelement.text or u'') # Include markup as well target += u''.join([ data.forceunicode(etree.tostring(child, encoding='utf-8')) for child in self.xmlelement.iterchildren() ]) return self.unescape(target)
def gettarget(self): if self.hasplural(): strings = [data.forceunicode(unit.target) for unit in self.units] if strings: return multistring(strings) else: return None else: return super(PoXliffUnit, self).gettarget()
def select_match(self, match_data): """Handle a match-selection event. (This method is used as View-Controller communications)""" unit_controller = self.main_controller.unit_controller target_n = unit_controller.view.focused_target_n old_text = unit_controller.view.get_target_n(target_n) textbox = unit_controller.view.targets[target_n] self.main_controller.undo_controller.push_current_text(textbox) unit_controller.set_unit_target(target_n, forceunicode(match_data['target']))
def _on_insert_text(self, buffer, iter, ins_text, length): if self.elem is None: return ins_text = data.forceunicode(ins_text[:length]) buff_offset = iter.get_offset() gui_info = self.elem.gui_info left = gui_info.elem_at_offset(buff_offset - 1) right = gui_info.elem_at_offset(buff_offset) #logging.debug('"%s[[%s]]%s" | elem=%s[%d] | left=%s right=%s' % ( # buffer.get_text(buffer.get_start_iter(), iter), # ins_text, # buffer.get_text(iter, buffer.get_end_iter()), # repr(self.elem), buff_offset, # repr(left), repr(right) #)) succeeded = False if not (left is None and right is None) and (left is not right or not unicode(left)): succeeded = self.elem.insert_between(left, right, ins_text) #logging.debug('self.elem.insert_between(%s, %s, "%s"): %s' % (repr(left), repr(right), ins_text, succeeded)) if not succeeded and left is not None and left is right and left.isleaf( ): # This block handles the special case where a the cursor is just # inside a leaf element with a closing widget. In this case both # left and right will point to the element in question, but it # need not be empty to be a leaf. Because the cursor is still # "inside" the element, we want to append to this leaf in stead # of after it, which is what StringElem.insert() will do, seeing # as the position before and after the widget is the same to in # the context of StringElem. anchor = iter.get_child_anchor() if anchor: widgets = anchor.get_widgets() left_widgets = left.gui_info.widgets if len(widgets) > 0 and len(left_widgets) > 1 and \ widgets[0] is left_widgets[1] and \ iter.get_offset() == self.elem.gui_info.length() - 1: succeeded = left.insert(len(left), ins_text) #logging.debug('%s.insert(len(%s), "%s")' % (repr(left), repr(left), ins_text)) if not succeeded: offset = gui_info.gui_to_tree_index(buff_offset) succeeded = self.elem.insert(offset, ins_text) #logging.debug('self.elem.insert(%d, "%s"): %s' % (offset, ins_text, succeeded)) if succeeded: self.elem.prune() cursor_pos = self.refresh_cursor_pos if cursor_pos < 0: cursor_pos = self.buffer.props.cursor_position cursor_pos += len(ins_text) self.refresh_cursor_pos = cursor_pos #logging.debug('text-inserted: %s@%d of %s' % (ins_text, iter.get_offset(), repr(self.elem))) self.emit('text-inserted', ins_text, buff_offset, self.elem)
def source(self, source): self._rich_source = None source = data.forceunicode(source or u"") source = source or u"" if isinstance(source, multistring): self._source = source elif isinstance(source, six.text_type): self._source = source else: # If it is unicode, list or dict. self._source = multistring(source)
def gettarget(self): targetnode = self._gettargetnode() if targetnode is None: etree.SubElement(self.xmlelement, self.namespaced("translation")) return None if self.hasplural(): numerus_nodes = targetnode.findall(self.namespaced("numerusform")) return multistring([node.text or u"" for node in numerus_nodes]) else: return data.forceunicode(targetnode.text) or u""
def addlocation(self, location): """Add a location to sourcecomments in the PO unit :param location: Text location e.g. 'file.c:23' does not include #: :type location: String """ location = data.forceunicode(location) location = pocommon.quote_plus(location) self.sourcecomments.append("#: %s\n" % location)
def _on_insert_text(self, buffer, iter, ins_text, length): if self.elem is None: return ins_text = data.forceunicode(ins_text[:length]) buff_offset = iter.get_offset() gui_info = self.elem.gui_info left = gui_info.elem_at_offset(buff_offset-1) right = gui_info.elem_at_offset(buff_offset) #logging.debug('"%s[[%s]]%s" | elem=%s[%d] | left=%s right=%s' % ( # buffer.get_text(buffer.get_start_iter(), iter), # ins_text, # buffer.get_text(iter, buffer.get_end_iter()), # repr(self.elem), buff_offset, # repr(left), repr(right) #)) succeeded = False if not (left is None and right is None) and (left is not right or not unicode(left)): succeeded = self.elem.insert_between(left, right, ins_text) #logging.debug('self.elem.insert_between(%s, %s, "%s"): %s' % (repr(left), repr(right), ins_text, succeeded)) if not succeeded and left is not None and left is right and left.isleaf(): # This block handles the special case where a the cursor is just # inside a leaf element with a closing widget. In this case both # left and right will point to the element in question, but it # need not be empty to be a leaf. Because the cursor is still # "inside" the element, we want to append to this leaf in stead # of after it, which is what StringElem.insert() will do, seeing # as the position before and after the widget is the same to in # the context of StringElem. anchor = iter.get_child_anchor() if anchor: widgets = anchor.get_widgets() left_widgets = left.gui_info.widgets if len(widgets) > 0 and len(left_widgets) > 1 and \ widgets[0] is left_widgets[1] and \ iter.get_offset() == self.elem.gui_info.length() - 1: succeeded = left.insert(len(left), ins_text) #logging.debug('%s.insert(len(%s), "%s")' % (repr(left), repr(left), ins_text)) if not succeeded: offset = gui_info.gui_to_tree_index(buff_offset) succeeded = self.elem.insert(offset, ins_text) #logging.debug('self.elem.insert(%d, "%s"): %s' % (offset, ins_text, succeeded)) if succeeded: self.elem.prune() cursor_pos = self.refresh_cursor_pos if cursor_pos < 0: cursor_pos = self.buffer.props.cursor_position cursor_pos += len(ins_text) self.refresh_cursor_pos = cursor_pos #logging.debug('text-inserted: %s@%d of %s' % (ins_text, iter.get_offset(), repr(self.elem))) self.emit('text-inserted', ins_text, buff_offset, self.elem)
def get_text(self, start_iter=None, end_iter=None): """Return the text rendered in this text box. Uses C{gtk.TextBuffer.get_text()}.""" if isinstance(start_iter, int): start_iter = self.buffer.get_iter_at_offset(start_iter) if isinstance(end_iter, int): end_iter = self.buffer.get_iter_at_offset(end_iter) if start_iter is None: start_iter = self.buffer.get_start_iter() if end_iter is None: end_iter = self.buffer.get_end_iter() return data.forceunicode(self.buffer.get_text(start_iter, end_iter))
def setsource(self, source): self._rich_source = None # assert isinstance(source, unicode) source = data.forceunicode(source or u"") source = source or u"" if isinstance(source, multistring): self._source = source elif isinstance(source, unicode): self._source = source else: #unicode, list, dict self._source = multistring(source)
def target(self, target): # Firstly deal with reinitialising to None or setting to identical # string. self._rich_target = None if self.target == target: return targetnode = self._gettargetnode() targetnode.clear() targetnode.text = data.forceunicode(target) or u"" # Assume no <comment> follows; allow the </data> element # to be indented with 2 spaces (same level as the opening # <data> element before <value>) targetnode.tail = u"\n "
def settarget(self, text, lang='xx', append=False): # Firstly deal with reinitialising to None or setting to identical # string. self._rich_target = None if self.gettarget() == text: return strings = [] if isinstance(text, list): strings = text else: strings = [text] targetnode = self._gettargetnode() targetnode.clear() targetnode.text = data.forceunicode(text) or u"" targetnode.tail = u"\n "
def accept_response(self, tmmodel, query_str, matches): """Accept a query-response from the model. (This method is used as Model-Controller communications)""" if not self.storecursor: # File closed since the query was started return query_str = forceunicode(query_str) if query_str != self.current_query or not matches: return # Perform some sanity checks on matches first for match in matches: if not isinstance(match.get('quality', 0), int): match['quality'] = int(match['quality'] or 0) if 'tmsource' not in match or match['tmsource'] is None: match['tmsource'] = tmmodel.display_name match['query_str'] = query_str anything_new = False for match in matches: curr_targets = [normalize(m['target']) for m in self.matches] if normalize(match['target']) not in curr_targets: # Let's insert at the end to prioritise existing matches over # new ones. We rely on the guarantee of sort stability. This # way an existing 100% will be above a new 100%. self.matches.append(match) anything_new = True else: norm_match_target = normalize(match['target']) prevmatch = [ m for m in self.matches if normalize(m['target']) == norm_match_target ][0] if 'quality' not in prevmatch or not prevmatch['quality']: # Matches without quality are assumed to be less appropriate # (ie. MT matches) than matches with an associated quality. self.matches.remove(prevmatch) self.matches.append(match) anything_new = True if not anything_new: return self.matches.sort(key=lambda x: 'quality' in x and x['quality'] or 0, reverse=True) self.matches = self.matches[:self.max_matches] # Only call display_matches if necessary: if self.matches: self.view.display_matches(self.matches)
def settarget(self, text, lang='xx', append=False): # Firstly deal with reinitialising to None or setting to identical # string. self._rich_target = None if self.target == text: return strings = [] if isinstance(text, list): strings = text else: strings = [text] targetnode = self._gettargetnode() targetnode.clear() targetnode.text = data.forceunicode(text) or u"" # Assume no <comment> follows; allow the </data> element # to be indented with 2 spaces (same level as the opening # <data> element before <value>) targetnode.tail = u"\n "
def accept_response(self, tmmodel, query_str, matches): """Accept a query-response from the model. (This method is used as Model-Controller communications)""" if not self.storecursor: # File closed since the query was started return query_str = forceunicode(query_str) if query_str != self.current_query or not matches: return # Perform some sanity checks on matches first for match in matches: if not isinstance(match.get('quality', 0), int): match['quality'] = int(match['quality'] or 0) if 'tmsource' not in match or match['tmsource'] is None: match['tmsource'] = tmmodel.display_name match['query_str'] = query_str anything_new = False for match in matches: curr_targets = [normalize(m['target']) for m in self.matches] if normalize(match['target']) not in curr_targets: # Let's insert at the end to prioritise existing matches over # new ones. We rely on the guarantee of sort stability. This # way an existing 100% will be above a new 100%. self.matches.append(match) anything_new = True else: norm_match_target = normalize(match['target']) prevmatch = [m for m in self.matches if normalize(m['target']) == norm_match_target][0] if 'quality' not in prevmatch or not prevmatch['quality']: # Matches without quality are assumed to be less appropriate # (ie. MT matches) than matches with an associated quality. self.matches.remove(prevmatch) self.matches.append(match) anything_new = True if not anything_new: return self.matches.sort(key=lambda x: 'quality' in x and x['quality'] or 0, reverse=True) self.matches = self.matches[:self.max_matches] # Only call display_matches if necessary: if self.matches: self.view.display_matches(self.matches)
def addnote(self, text, origin=None, position="append"): """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" # ignore empty strings and strings without non-space characters if not (text and text.strip()): return text = data.forceunicode(text) commentlist = self.othercomments if origin in ["programmer", "developer", "source code"]: autocomments = True commentlist = self.automaticcomments if text.endswith(u'\n'): text = text[:-1] text = text.split(u"\n") if position == "append": commentlist.extend(text) else: newcomments = text newcomments.extend(commentlist) if autocomments: self.automaticcomments = newcomments else: self.othercomments = newcomments
def addnote(self, text, origin=None, position="append"): """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" # ignore empty strings and strings without non-space characters if not (text and text.strip()): return text = data.forceunicode(text) commentlist = self.othercomments linestart = "# " if origin in ["programmer", "developer", "source code"]: autocomments = True commentlist = self.automaticcomments linestart = "#. " text = text.split("\n") if position == "append": commentlist += [linestart + line + "\n" for line in text] else: newcomments = [linestart + line + "\n" for line in text] newcomments += [line for line in commentlist] if autocomments: self.automaticcomments = newcomments else: self.othercomments = newcomments
def addnote(self, text, origin=None, position="append"): # ignore empty strings and strings without non-space characters if not (text and text.strip()): return text = data.forceunicode(text) oldnotes = self.getnotes(origin) newnotes = None if oldnotes: if position == "append": newnotes = oldnotes + "\n" + text elif position == "merge": if oldnotes != text: oldnoteslist = oldnotes.split("\n") for newline in text.split("\n"): newline = newline.rstrip("\r") # avoid duplicate comment lines (this might cause some problems) if newline not in oldnotes or len(newline) < 5: oldnoteslist.append(newline) newnotes = "\n".join(oldnoteslist) else: newnotes = text + '\n' + oldnotes else: newnotes = "\n".join( [line.rstrip("\r") for line in text.split("\n")]) if newnotes: newlines = [] needs_space = get_libgettextpo_version() < (0, 17, 0) for line in newnotes.split("\n"): if line and needs_space: newlines.append(" " + line) else: newlines.append(line) newnotes = "\n".join(newlines).encode(self._encoding) if origin in ["programmer", "developer", "source code"]: gpo.po_message_set_extracted_comments(self._gpo_message, newnotes) else: gpo.po_message_set_comments(self._gpo_message, newnotes)
def addnote(self, text, origin=None, position="append"): # ignore empty strings and strings without non-space characters if not (text and text.strip()): return text = data.forceunicode(text) oldnotes = self.getnotes(origin) newnotes = None if oldnotes: if position == "append": newnotes = oldnotes + "\n" + text elif position == "merge": if oldnotes != text: oldnoteslist = oldnotes.split("\n") for newline in text.split("\n"): newline = newline.rstrip("\r") # avoid duplicate comment lines (this might cause some problems) if newline not in oldnotes or len(newline) < 5: oldnoteslist.append(newline) newnotes = "\n".join(oldnoteslist) else: newnotes = text + '\n' + oldnotes else: newnotes = "\n".join([line.rstrip("\r") for line in text.split("\n")]) if newnotes: newlines = [] needs_space = get_libgettextpo_version() < (0, 17, 0) for line in newnotes.split("\n"): if line and needs_space: newlines.append(" " + line) else: newlines.append(line) newnotes = "\n".join(newlines).encode(self.CPO_ENC) if origin in ["programmer", "developer", "source code"]: gpo.po_message_set_extracted_comments(self._gpo_message, newnotes) else: gpo.po_message_set_comments(self._gpo_message, newnotes)
def setsource(self, text, sourcelang='en'): if self._rich_source is not None: self._rich_source = None text = data.forceunicode(text) self.source_dom = self.createlanguageNode(sourcelang, text, "source")
def setcontext(self, context): context = data.forceunicode(context or u"") self._msgctxt = context
def settarget(self, target): self._rich_target = None target = data.forceunicode(target) self.translation = self.personality.encode(target or u"", self.encoding)
def setsource(self, source): self._rich_source = None source = data.forceunicode(source) self.value = self.personality.encode(source or u"", self.encoding)
def target(self, target): self._rich_target = None target = data.forceunicode(target) self.translation = self.personality.encode(target or "", self.encoding) self.explicitely_missing = not bool(target)
def source(self, source): self._rich_source = None self.value = self.personality.encode( data.forceunicode(source) or "", self.encoding)
def setcontext(self, context): context = data.forceunicode(context) self.msgctxt = quoteforpo(context)