Ejemplo n.º 1
0
    def soup2markup(self, soup: Any) -> List[Any]:
        # Ensure a string is provided, in case the soup finds none
        # This could occur if eg. an image is removed or not shown
        markup = ['']  # type: List[Union[str, Tuple[Optional[str], Any]]]
        if soup is None:  # This is not iterable, so return promptly
            return markup
        unrendered_tags = {  # In pairs of 'tag_name': 'text'
            # TODO: Some of these could be implemented
            'br': '',  # No indicator of absence
            'hr': 'RULER',
            'img': 'IMAGE',
        }
        unrendered_div_classes = {  # In pairs of 'div_class': 'text'
            # TODO: Support embedded content & twitter preview?
            'message_embed': 'EMBEDDED CONTENT',
            'inline-preview-twitter': 'TWITTER PREVIEW',
            'message_inline_ref': '',  # Duplicate of other content
            'message_inline_image': '',  # Duplicate of other content
        }
        unrendered_template = '[{} NOT RENDERED]'
        for element in soup:
            if isinstance(element, NavigableString):
                # NORMAL STRINGS
                if hasattr(self, 'bq_len') and element == '\n' and \
                        self.bq_len > 0:
                    self.bq_len -= 1
                    continue
                markup.append(element)
            elif (element.name == 'div' and element.attrs and
                    any(cls in element.attrs.get('class', [])
                        for cls in unrendered_div_classes)):
                # UNRENDERED DIV CLASSES
                matching_class = (set(unrendered_div_classes) &
                                  set(element.attrs.get('class')))
                text = unrendered_div_classes[matching_class.pop()]
                if text:
                    markup.append(unrendered_template.format(text))
            elif (element.name == 'img' and
                    element.attrs.get('class', []) == ['emoji']):
                # CUSTOM EMOJIS AND ZULIP_EXTRA_EMOJI
                emoji_name = element.attrs.get('title', [])
                markup.append(('msg_emoji', ":"+emoji_name+":"))
            elif element.name in unrendered_tags:
                # UNRENDERED SIMPLE TAGS
                text = unrendered_tags[element.name]
                if text:
                    markup.append(unrendered_template.format(text))
            elif element.name in ('p', 'ul', 'del'):
                # PARAGRAPH, LISTS, STRIKE-THROUGH
                markup.extend(self.soup2markup(element))
            elif (element.name == 'span' and element.attrs and
                  'emoji' in element.attrs.get('class', [])):
                # EMOJI
                markup.append(('msg_emoji', element.text))
            elif (element.name == 'span' and element.attrs and
                  ('katex-display' in element.attrs.get('class', []) or
                   'katex' in element.attrs.get('class', []))):
                # MATH TEXT
                markup.append(element.text)
            elif element.name == 'span' and element.attrs and\
                    ('user-mention' in element.attrs.get('class', []) or
                     'user-group-mention' in element.attrs.get('class', [])):
                # USER MENTIONS & USER-GROUP MENTIONS
                markup.append(('msg_mention', element.text))
            elif element.name == 'a':
                # LINKS
                link = element.attrs['href']
                text = element.img['src'] if element.img else element.text

                parsed_link = urlparse(link)
                if not parsed_link.scheme:  # => relative link
                    # Prepend org url to convert it to an absolute link
                    link = urljoin(self.model.server_url, link)

                if link == text:
                    # If the link and text are same
                    # usually the case when user just pastes
                    # a link then just display the link
                    markup.append(('msg_link', text))
                else:
                    markup.append(
                        ('msg_link', '[' + text + ']' + '(' + link + ')'))
            elif element.name == 'blockquote':
                # BLOCKQUOTE TEXT
                markup.append((
                    'msg_quote', self.soup2markup(element)
                ))
            elif element.name == 'code':
                # CODE (INLINE?)
                markup.append((
                    'msg_code', element.text
                ))
            elif element.name == 'div' and element.attrs and\
                    'codehilite' in element.attrs.get('class', []):
                # CODE (BLOCK?)
                markup.append((
                    'msg_code', element.text
                ))
            elif element.name in ('strong', 'em'):
                # BOLD & ITALIC
                markup.append(('msg_bold', element.text))
            elif element.name == 'li':
                # LISTS
                # TODO: Support nested lists
                markup.append('  * ')
                markup.extend(self.soup2markup(element))
            elif element.name == 'table':
                markup.extend(render_table(element))
            else:
                markup.extend(self.soup2markup(element))
        return markup
Ejemplo n.º 2
0
    def soup2markup(self, soup: Any, **state: Any) -> List[Any]:
        # Ensure a string is provided, in case the soup finds none
        # This could occur if eg. an image is removed or not shown
        markup = ['']  # type: List[Union[str, Tuple[Optional[str], Any]]]
        if soup is None:  # This is not iterable, so return promptly
            return markup
        unrendered_tags = {  # In pairs of 'tag_name': 'text'
            # TODO: Some of these could be implemented
            'br': '',  # No indicator of absence
            'hr': 'RULER',
            'img': 'IMAGE',
        }
        unrendered_div_classes = {  # In pairs of 'div_class': 'text'
            # TODO: Support embedded content & twitter preview?
            'message_embed': 'EMBEDDED CONTENT',
            'inline-preview-twitter': 'TWITTER PREVIEW',
            'message_inline_ref': '',  # Duplicate of other content
            'message_inline_image': '',  # Duplicate of other content
        }
        unrendered_template = '[{} NOT RENDERED]'
        for element in soup:
            if isinstance(element, NavigableString):
                # NORMAL STRINGS
                if (hasattr(self, 'bq_len') and element == '\n'
                        and self.bq_len > 0):
                    self.bq_len -= 1
                    continue
                markup.append(element)
            elif (element.name == 'div' and element.attrs
                  and any(cls in element.attrs.get('class', [])
                          for cls in unrendered_div_classes)):
                # UNRENDERED DIV CLASSES
                matching_class = (set(unrendered_div_classes)
                                  & set(element.attrs.get('class')))
                text = unrendered_div_classes[matching_class.pop()]
                if text:
                    markup.append(unrendered_template.format(text))
            elif (element.name == 'img'
                  and element.attrs.get('class', []) == ['emoji']):
                # CUSTOM EMOJIS AND ZULIP_EXTRA_EMOJI
                emoji_name = element.attrs.get('title', [])
                markup.append(('msg_emoji', ":" + emoji_name + ":"))
            elif element.name in unrendered_tags:
                # UNRENDERED SIMPLE TAGS
                text = unrendered_tags[element.name]
                if text:
                    markup.append(unrendered_template.format(text))
            elif element.name in ('p', 'del'):
                # PARAGRAPH, STRIKE-THROUGH
                markup.extend(self.soup2markup(element))
            elif (element.name == 'span' and element.attrs
                  and 'emoji' in element.attrs.get('class', [])):
                # EMOJI
                markup.append(('msg_emoji', element.text))
            elif (element.name == 'span' and element.attrs
                  and ('katex-display' in element.attrs.get('class', [])
                       or 'katex' in element.attrs.get('class', []))):
                # MATH TEXT
                markup.append(element.text)
            elif (element.name == 'span' and element.attrs
                  and ('user-group-mention' in element.attrs.get('class', [])
                       or 'user-mention' in element.attrs.get('class', []))):
                # USER MENTIONS & USER-GROUP MENTIONS
                markup.append(('msg_mention', element.text))
            elif element.name == 'a':
                # LINKS
                # Use rstrip to avoid anomalies and edge cases like
                # https://google.com vs https://google.com/.
                link = element.attrs['href'].rstrip('/')
                text = element.img['src'] if element.img else element.text
                text = text.rstrip('/')

                parsed_link = urlparse(link)
                if not parsed_link.scheme:  # => relative link
                    # Prepend org url to convert it to an absolute link
                    link = urljoin(self.model.server_url, link)

                text = text if text else link

                show_footlink = True
                # Only use the last segment if the text is redundant.
                # NOTE: The 'without scheme' excerpt is to deal with the case
                # where a user puts a link without any scheme and the server
                # uses http as the default scheme but keeps the text as-is.
                # For instance, see how example.com/some/path becomes
                # <a href="http://example.com">example.com/some/path</a>.
                link_without_scheme, text_without_scheme = [
                    data.split('://')[1] if '://' in data else data
                    for data in [link, text]
                ]  # Split on '://' is for cases where text == link.
                if link_without_scheme == text_without_scheme:
                    segment = text.split('/')[-1]
                    # Replace text with its last segment if the segment has
                    # something significant than simply the 'domain name'.
                    if segment != text_without_scheme:
                        text = segment
                    else:
                        # Do not show as a footlink as the text is sufficient
                        # to represent the link.
                        show_footlink = False

                # Detect duplicate links to save screen real estate.
                if link not in self.message_links:
                    self.message_links[link] = (text,
                                                len(self.message_links) + 1,
                                                show_footlink)
                else:
                    # Append the text if its link already exist with a
                    # different text.
                    saved_text, saved_link_index, saved_footlink_status = (
                        self.message_links[link])
                    if saved_text != text:
                        self.message_links[link] = (
                            '{}, {}'.format(saved_text, text),
                            saved_link_index,
                            show_footlink or saved_footlink_status,
                        )

                markup.extend([
                    ('msg_link', text),
                    ' ',
                    ('msg_link_index',
                     '[{}]'.format(self.message_links[link][1])),
                ])
            elif element.name == 'blockquote':
                # BLOCKQUOTE TEXT
                markup.append(('msg_quote', self.soup2markup(element)))
            elif element.name == 'code':
                # CODE (INLINE?)
                markup.append(('msg_code', element.text))
            elif (element.name == 'div' and element.attrs
                  and 'codehilite' in element.attrs.get('class', [])):
                # CODE (BLOCK?)
                markup.append(('msg_code', element.text))
            elif element.name in ('strong', 'em'):
                # BOLD & ITALIC
                markup.append(('msg_bold', element.text))
            elif element.name in ('ul', 'ol'):
                # LISTS (UL & OL)
                for part in element.contents:
                    if part == '\n':
                        part.replace_with('')

                if 'indent_level' not in state:
                    state['indent_level'] = 1
                    state['list_start'] = True
                else:
                    state['indent_level'] += 1
                    state['list_start'] = False
                if element.name == 'ol':
                    start_number = int(element.attrs.get('start', 1))
                    state['list_index'] = start_number
                    markup.extend(self.soup2markup(element, **state))
                    del state['list_index']  # reset at end of this list
                else:
                    if 'list_index' in state:
                        del state['list_index']  # this is unordered
                    markup.extend(self.soup2markup(element, **state))
                del state['indent_level']  # reset indents after any list
            elif element.name == 'li':
                # LIST ITEMS (LI)
                for part in element.contents:
                    if part == '\n':
                        part.replace_with('')
                if not state.get('list_start', False):
                    markup.append('\n')

                indent = state.get('indent_level', 1)
                if 'list_index' in state:
                    markup.append('{}{}. '.format('  ' * indent,
                                                  state['list_index']))
                    state['list_index'] += 1
                else:
                    chars = [
                        '\N{BULLET}',
                        '\N{RING OPERATOR}',  # small hollow
                        '\N{HYPHEN}',
                    ]
                    markup.append('{}{} '.format('  ' * indent,
                                                 chars[(indent - 1) % 3]))
                state['list_start'] = False
                markup.extend(self.soup2markup(element, **state))
            elif element.name == 'table':
                markup.extend(render_table(element))
            else:
                markup.extend(self.soup2markup(element))
        return markup