def get_text(self, start=None, end=None, include_hidden_chars=True): """ Return the buffer text. .. note:: ``s_`` prefix means StyledText*, while ``g_`` prefix means Gtk.*. """ if start is None: start = self.get_start_iter() if end is None: end = self.get_end_iter() txt = super(StyledTextBuffer, self).get_text(start, end, include_hidden_chars) txt = str(txt) # extract tags out of the buffer g_tags = self._get_tag_from_range() s_tags = [] for g_tagname, g_ranges in g_tags.items(): if g_tagname.startswith('link'): tag = self.get_tag_table().lookup(g_tagname) s_ranges = [(start, end + 1) for (start, end) in g_ranges] s_value = tag.data s_tag = StyledTextTag(_('Link'), s_value, s_ranges) s_tags.append(s_tag) else: style_and_value = g_tagname.split(' ', 1) try: style = int(style_and_value[0]) if len(style_and_value) == 1: s_value = None else: s_value = StyledTextTagType.STYLE_TYPE[style]\ (style_and_value[1]) if style in ALLOWED_STYLES: s_ranges = [(start, end + 1) for (start, end) in g_ranges] s_tag = StyledTextTag(style, s_value, s_ranges) s_tags.append(s_tag) except ValueError: _LOG.debug("silently skipping Gtk.TextTag '%s'" % g_tagname) return StyledText(txt, s_tags)
def __getitem__(self, key): string = self._string[key] if isinstance(key, slice): #Get the start, stop, and step from the slice if key.step: raise IndexError("Invalid step size") key_start = 0 if key.start is None else key.start key_stop = len(self._string) if key.stop is None else key.stop new_tags = [] for tag in self._tags: new_tag = StyledTextTag(int(tag.name), tag.value) for (start_tag, end_tag) in tag.ranges: start = max(key_start, start_tag) end = min(key_stop, end_tag) if start < end: new_tag.ranges.append( (start - key_start, end - key_start)) if new_tag.ranges: new_tags.append(new_tag) return self.__class__(string, new_tags) # elif isinstance(key, int): # if key < 0: # Handle negative indices # key += len(self) # if key < 0 or key >= len(self): # raise IndexError("The index (%d) is out of range." % key) # return self.getData(key) # Get the data from elsewhere else: raise TypeError("Invalid argument type.")
def _get_styled(name, callname, placeholder=False, trans_text=glocale.translation.sgettext, name_format=None): """ Return a StyledText object with the name formatted according to the parameters: @param callname: whether the callname should be used instead of the first name (CALLNAME_REPLACE), underlined within the first name (CALLNAME_UNDERLINE_ADD) or not used at all (CALLNAME_DONTUSE). @param placeholder: whether a series of underscores should be inserted as a placeholder if first name or surname are missing. @param trans_text: allow deferred translation of strings @type trans_text: a GrampsLocale sgettext instance trans_text is a defined keyword (see po/update_po.py, po/genpot.sh) :param name_format: optional format to control display of person's name :type name_format: None or int """ # Make a copy of the name object so we don't mess around with the real # data. n = Name(source=name) # Insert placeholders. if placeholder: if not n.first_name: n.first_name = "____________" if not n.surname: n.surname = "____________" if n.call: if callname == CALLNAME_REPLACE: # Replace first name with call name. n.first_name = n.call elif callname == CALLNAME_UNDERLINE_ADD: if n.call not in n.first_name: # Add call name to first name. # translators: used in French+Russian, ignore otherwise n.first_name = trans_text('"%(callname)s" (%(firstname)s)') % { 'callname': n.call, 'firstname': n.first_name } real_format = name_displayer.get_default_format() if name_format is not None: name_displayer.set_default_format(name_format) text = name_displayer.display_name(n) name_displayer.set_default_format(real_format) tags = [] if n.call: if callname == CALLNAME_UNDERLINE_ADD: # "name" in next line is on purpose: only underline the call name # if it was a part of the *original* first name if n.call in name.first_name: # Underline call name callpos = text.find(n.call) tags = [StyledTextTag(StyledTextTagType.UNDERLINE, True, [(callpos, callpos + len(n.call))])] return StyledText(text, tags)
def _get_styled(name, callname, placeholder=False, name_format=None): """ Return a StyledText object with the name formatted according to the parameters: @param callname: whether the callname should be used instead of the first name (CALLNAME_REPLACE), underlined within the first name (CALLNAME_UNDERLINE_ADD) or not used at all (CALLNAME_DONTUSE). @param placeholder: whether a series of underscores should be inserted as a placeholder if first name or surname are missing. """ # Make a copy of the name object so we don't mess around with the real # data. n = Name(source=name) # Insert placeholders. if placeholder: if not n.first_name: n.first_name = "____________" if not n.surname: n.surname = "____________" if n.call: if callname == CALLNAME_REPLACE: # Replace first name with call name. n.first_name = n.call elif callname == CALLNAME_UNDERLINE_ADD: if n.call not in n.first_name: # Add call name to first name. n.first_name = "\"%(call)s\" (%(first)s)" % { 'call': n.call, 'first': n.first_name } real_format = name_displayer.get_default_format() if name_format is not None: name_displayer.set_default_format(name_format) text = name_displayer.display_name(n) name_displayer.set_default_format(real_format) tags = [] if n.call: if callname == CALLNAME_UNDERLINE_ADD: # "name" in next line is on purpose: only underline the call name # if it was a part of the *original* first name if n.call in name.first_name: # Underline call name callpos = text.find(n.call) tags = [ StyledTextTag(StyledTextTagType.UNDERLINE, True, [(callpos, callpos + len(n.call))]) ] return StyledText(text, tags)
def create_note(self, place, data, trans): new_note = Note() tag = StyledTextTag(StyledTextTagType.FONTFACE, 'Monospace', [(0, len(data))]) text = StyledText(data, [tag]) new_note.set_styledtext(text) note_type = NoteType() note_type.set((NoteType.CUSTOM, _("Place titles"))) new_note.set_type(note_type) handle = self.db.add_note(new_note, trans) place.add_note(handle)
def linkst(text, url): """ Return text as link styled text """ tags = [StyledTextTag(StyledTextTagType.LINK, url, [(0, len(text))])] return StyledText(text, tags)
def boldst(text): """ Return text as bold styled text """ tags = [StyledTextTag(StyledTextTagType.BOLD, True, [(0, len(text))])] return StyledText(text, tags)
def convert_to_styled(self, data): """ This scans incoming notes for possible html. It converts a select few tags into StyledText and removes the rest of the tags. Notes of this type occur in data from FTM and ancestry.com. Result is a much cleaner note. @param data: a string of text possibly containg html @type data: str """ token_specification = [ # Italics: must not be nested, any tag terminates ('ITALIC', r'<i>.*?(?=<)'), # bolds: must not be nested, any tag terminates ('BOLD', r'<b>.*?(?=<)'), # Underlines: must not be nested, any tag terminates ('UNDER', r'<u>.*?(?=<)'), # Table Header Begin (start Bold) ('TBLHDRB', r'<tr><th>'), # Table Header End (end Bold and \n) ('TBLHDRE', r'</th></tr>'), # Table Header Cell (repl with ': ') ('TBLHDRC', r'(<\th>)?<th>'), # Table Cell break (repl with ': ') ('TBLCELL', r'</td><td>'), # Table ('TABLE', r'</?table.*?>'), # Href start to end ('HREF', r'<+a .*?href=["\' ]*(?P<HREFL>.*?)'\ r'["\' ].*?>(?P<HREFT>.*?)</a>+'), # HTTP start to end (have to rstrip(' .:') for link) ('HTTP', r'https?:.*?(\s|$)'), # Paragraph end ('PARAEND', r'</p>|</li>|<tr>|<br>'), # Skip over these tags ('SKIP', r'<ul>|</ul>|<li>|<p>|</tr>|<td>|</td>|<th>|'\ r'</a>|</i>|</b>|</u>'), # Unimplemented HTTP tags ('UNKNWN', r'<.*?>'), ] tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification) prev = 0 chunkpos = 0 chunks = [] italics = [] bolds = [] unders = [] links = [] reds = [] bldpos = -1 data = html.unescape(data) # clean up escaped html "<" etc. for mo in re.finditer(tok_regex, data, flags=(re.DOTALL | re.I)): kind = mo.lastgroup st_txt = mo.group(kind) in_start = mo.start() in_end = mo.end() if kind == 'SKIP' or kind == 'TABLE': if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) elif kind == 'PARAEND': chunks.append(data[prev:in_start] + '\n') chunkpos += (in_start - prev + 1) elif kind == 'ITALIC': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 italics.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'BOLD': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 bolds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'UNDER': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 unders.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'HTTP': # HTTP found st_txt = mo.group('HTTP') oldpos = chunkpos + in_start - prev chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) st_txt = st_txt.rstrip(' .:)') newpos = oldpos + len(st_txt) links.append((st_txt, oldpos, newpos)) elif kind == 'HREF': # HREF found st_txt = mo.group('HREFT') lk_txt = mo.group('HREFL') # fix up relative links emmitted by ancestry.com if(lk_txt.startswith("/search/dbextra") or lk_txt.startswith("/handler/domain")): lk_txt = "http://search.ancestry.com" + lk_txt oldpos = chunkpos + in_start - prev # if tag (minus any trailing '.') is substring of link if st_txt[0:-1] in lk_txt: st_txt = lk_txt # just use the link else: # use link and tag st_txt = " " + lk_txt + " (" + st_txt + ")" newpos = oldpos + len(st_txt) chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) links.append((lk_txt, oldpos, newpos)) elif kind == 'TBLCELL' or kind == 'TBLHDRC': # Table cell break chunks.append(data[prev:in_start] + ': ') chunkpos += (in_start - prev + 3) elif kind == 'TBLHDRB': # header start if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bldpos = chunkpos elif kind == 'TBLHDRE': # Header end if bldpos == -1: if prev != in_start: chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Invalid table header, no start tag found') else: if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bolds.append((bldpos, chunkpos)) bldpos = -1 elif kind == 'UNKNWN': if prev != in_start: chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Unexpected or unimplemented HTML tag', st_txt) else: print("shouldn't get here") prev = in_end chunks.append(data[prev:]) result = ''.join(chunks) tags = [] for link in links: tags.append(StyledTextTag(StyledTextTagType.LINK, link[0], [(link[1], link[2])])) if italics: tags.append(StyledTextTag(StyledTextTagType.ITALIC, False , italics)) if bolds: tags.append(StyledTextTag(StyledTextTagType.BOLD, False , bolds)) if unders: tags.append(StyledTextTag(StyledTextTagType.UNDERLINE, False , unders)) if reds: tags.append(StyledTextTag(StyledTextTagType.HIGHLIGHT, '#FFFF00', reds)) return StyledText(result, tags)
def tag_merge(old_tags, tag_list): styles = {} # key:name value:quad outstyles = {} # key:tuple(name, value), value:list(ranges) tags = [] for (prior, tags) in enumerate((old_tags, tag_list)): for tag in tags: if tag.name.value not in styles: styles[tag.name.value] = [] out_range = outstyles.get((tag.name.value, tag.value)) if out_range is None: out_range = outstyles[(tag.name.value, tag.value)] = [] quads = styles[tag.name.value] for rang in tag.ranges: # quad: Value, priority, Start or Stop, True if Stop quads.append((tag.value, prior, rang[0], False)) quads.append((tag.value, prior, rang[1], True)) for tagname, quads in styles.items(): quads.sort(key=lambda quad: quad[2]) # sort by start/stop index # start, end are current range start = value = prior = None # open_low; list of low priority open (nested) values # open_high; list of high priority open (nested) values openst = [[], []] for quad in quads: if not quad[3]: # We have a start if start is None: # we can start up value = quad[0] prior = quad[1] start = quad[2] elif value == quad[0]: # we have an overlap with same continue else: # we have a nest or overlap with different openst[prior].append(value) # save current in open # close out current, and start new outstyles[(tagname, value)].append((start, quad[2])) value = quad[0] prior = quad[1] start = quad[2] else: # we have an end if start is None: # end with no start continue if quad[0] == value: # current finished outstyles[(tagname, value)].append((start, quad[2])) if openst[1]: # high priority nested to restart value = openst[1].pop() prior = 1 start = quad[2] elif openst[0]: # low priority nested to restart value = openst[0].pop() prior = 0 start = quad[2] else: # no nest to restart, just close out start = value = prior = None else: # clear out overlap try: openst[quad[1]].remove(quad[0]) except ValueError: pass continue end = None msg = ("Bad Style range! Do not save, " "if you do your db will be corrupted.") for ((name, value), ranges) in outstyles.items(): new_range = [] start = None for rang in ranges: if start is not None: if rang[0] == end: # should merge two ranges together end = rang[1] continue else: new_range.append((start, end)) if start is None or end is None: raise ValueError(msg) start = rang[0] end = rang[1] new_range.append((start, end)) if start is None or end is None: raise ValueError(msg) tags.append(StyledTextTag(name, value, new_range)) return tags
def convert_to_styled(self, data): """ This scans incoming notes for possible html. It converts a select few tags into StyledText and removes the rest of the tags. Notes of this type occur in data from FTM and ancestry.com. Result is a much cleaner note. @param data: a string of text possibly containg html @type data: str """ prev = 0 chunkpos = 0 chunks = [] italics = [] bolds = [] unders = [] links = [] reds = [] bldpos = -1 # data = html.unescape(data) # clean up escaped html "<" etc. for mo in re.finditer(html._charref, data._string): out = html._replace_charref(mo) in_start = mo.start() in_end = mo.end() data._string = (data._string[:in_start] + out + data._string[(in_start + len(out)):]) if prev != in_start + len(out): chunks.append(data[prev:(in_start + len(out))]) chunkpos += (in_start - prev + len(out)) prev = in_end chunks.append(data[prev:]) data = StyledText().join(chunks) prev = 0 chunkpos = 0 chunks = [] for mo in re.finditer(self.tok_regex, data._string, flags=(re.DOTALL | re.I)): kind = mo.lastgroup st_txt = mo.group(kind) in_start = mo.start() in_end = mo.end() if kind == 'SKIP' or kind == 'TABLE': if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) elif kind == 'PARAEND': chunks.append(data[prev:in_start] + '\n') chunkpos += (in_start - prev + 1) elif kind == 'ITALIC': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 italics.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'BOLD': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 bolds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'UNDER': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 unders.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'HTTP': # HTTP found st_txt = mo.group('HTTP') oldpos = chunkpos + in_start - prev chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) st_txt = st_txt.rstrip(' .:)') newpos = oldpos + len(st_txt) links.append((st_txt, oldpos, newpos)) elif kind == 'HREF': # HREF found st_txt = mo.group('HREFT') lk_txt = mo.group('HREFL') # fix up relative links emmitted by ancestry.com if (lk_txt.startswith("/search/dbextra") or lk_txt.startswith("/handler/domain")): lk_txt = "http://search.ancestry.com" + lk_txt oldpos = chunkpos + in_start - prev # if tag (minus any trailing '.') is substring of link if st_txt[0:-1] in lk_txt: st_txt = lk_txt # just use the link else: # use link and tag st_txt = " " + lk_txt + " (" + st_txt + ")" newpos = oldpos + len(st_txt) chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) links.append((lk_txt, oldpos, newpos)) elif kind == 'TBLCELL' or kind == 'TBLHDRC': # Table cell break chunks.append(data[prev:in_start] + ': ') chunkpos += (in_start - prev + 3) elif kind == 'TBLHDRB': # header start if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bldpos = chunkpos elif kind == 'TBLHDRE': # Header end if bldpos == -1: if prev != in_start: chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Invalid table header, no start tag found') else: if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bolds.append((bldpos, chunkpos)) bldpos = -1 elif kind == 'UNKNWN': chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Unexpected or unimplemented HTML tag', st_txt) else: print("shouldn't get here") prev = in_end chunks.append(data[prev:]) result = StyledText().join(chunks) tags = [] for link in links: tags.append( StyledTextTag(StyledTextTagType.LINK, link[0], [(link[1], link[2])])) if italics: tags.append(StyledTextTag(StyledTextTagType.ITALIC, False, italics)) if bolds: tags.append(StyledTextTag(StyledTextTagType.BOLD, False, bolds)) if unders: tags.append( StyledTextTag(StyledTextTagType.UNDERLINE, False, unders)) if reds: tags.append( StyledTextTag(StyledTextTagType.HIGHLIGHT, '#FFFF00', reds)) return StyledText(result._string, tag_merge(result._tags, tags))