Python HtmlParser Exemples, verto.utils.HtmlParser.HtmlParser Python Exemples

Exemple #1

0

Afficher le fichier

    def test_example_data_and_subelements(self):
        '''Checks that data and subelements work together.
        '''
        input_text = self.read_test_file('example_data_and_subelements.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()
        self.assertEquals('html', root.tag)

        elements = list(root)
        self.assertEquals(1, len(elements))
        self.assertEquals('body', elements[0].tag)

        elements = list(elements[0])  # Open Body
        self.assertEquals(2, len(elements))
        self.assertEquals('h1', elements[0].tag)
        self.assertEquals('p', elements[1].tag)

        elements = list(elements[1])  # Open p
        self.assertEquals(3, len(elements))
        self.assertEquals('em', elements[0].tag)
        self.assertEquals('b', elements[1].tag)
        self.assertEquals('a', elements[2].tag)

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #2

0

Afficher le fichier

 def test_example_access_root_before_feed_error(self):
     '''Checks that the AttributeError is raised is the root element
     is accessed before it is created.
     '''
     parser = HtmlParser()
     with self.assertRaises(AttributeError):
         parser.get_root()

Exemple #3

0

Afficher le fichier

Fichier : GenericTagBlockProcessor.py Projet : uccser/kordac

    def run(self, parent, blocks):
        ''' Generic run method for single match tags.

        Args:
            parent: The parent node of the element tree that children
                will reside in.
            blocks: A list of strings of the document, where the
                first block tests true.
        '''
        block = blocks.pop(0)

        match = self.pattern.search(block)
        before = block[:match.start()]
        after = block[match.end():]

        if before.strip() != '':
            self.parser.parseChunk(parent, before)
        if after.strip() != '':
            blocks.insert(0, after)

        argument_values = parse_arguments(self.processor, match.group('args'), self.arguments)

        extra_args = self.custom_parsing(argument_values)
        argument_values.update(extra_args)

        context = self.process_parameters(self.processor, self.template_parameters, argument_values)

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

Exemple #4

0

Afficher le fichier

    def test_example_basic_usage(self):
        '''Checks that the expected usecase works.
        '''
        input_text = self.read_test_file('example_basic_usage.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()
        self.assertEquals('html', root.tag)

        elements = list(root)
        self.assertEquals(1, len(elements))
        self.assertEquals('body', elements[0].tag)

        elements = list(elements[0])  # Open Body
        self.assertEquals(3, len(elements))
        self.assertEquals('h1', elements[0].tag)
        self.assertEquals('p', elements[1].tag)
        self.assertEquals('div', elements[2].tag)

        elements = list(elements[2])  # Open Div
        self.assertEquals(2, len(elements))
        self.assertEquals('img', elements[0].tag)
        self.assertEquals('a', elements[1].tag)

        img = elements[0]
        self.assertEquals('Example text.', img.get('alt'))
        self.assertEquals('example.com/example.jpg', img.get('src'))

        a = elements[1]
        self.assertEquals('https://www.example.com', a.get('href'))

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #5

0

Afficher le fichier

Fichier : GenericTagBlockProcessor.py Projet : eduardomourar/verto

    def run(self, parent, blocks):
        ''' Generic run method for single match tags.

        Args:
            parent: The parent node of the element tree that children
                will reside in.
            blocks: A list of strings of the document, where the
                first block tests true.
        '''
        block = blocks.pop(0)

        match = self.pattern.search(block)
        before = block[:match.start()]
        after = block[match.end():]

        if before.strip() != '':
            self.parser.parseChunk(parent, before)
        if after.strip() != '':
            blocks.insert(0, after)

        argument_values = parse_arguments(self.processor, match.group('args'),
                                          self.arguments)

        extra_args = self.custom_parsing(argument_values)
        argument_values.update(extra_args)

        context = self.process_parameters(self.processor,
                                          self.template_parameters,
                                          argument_values)

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

Exemple #6

0

Afficher le fichier

Fichier : ScratchTreeprocessor.py Projet : eduardomourar/verto

    def run(self, root):
        ''' Processes the html tree finding code tags where scratch
        code is used and replaces with template html.

        Args:
            root: The root of the document element tree.
        '''
        code_elements = []
        for node in root.iterfind(
                './/pre'
        ):  # A modified tree will leave the iterator undefined.
            code_elements.append(node)

        for node in code_elements:
            self.process_html(node)

        if self.fenced_compatibility:
            for i in range(self.markdown.htmlStash.html_counter):
                html_string, safe = self.markdown.htmlStash.rawHtmlBlocks[i]
                node = None
                try:
                    parser = HtmlParser()
                    node = parser.feed(html_string).close().get_root()
                except etree.ParseError:
                    pass

                if node is None:
                    continue
                self.process_html(node)
                html_string = HtmlSerializer.tostring(node)
                self.markdown.htmlStash.rawHtmlBlocks[i] = html_string, safe

Exemple #7

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

 def test_example_lone_end_tag_error(self):
     '''Checks that lone end tags cause an exception to be raised.
     '''
     input_text = self.read_test_file('example_lone_end_tag_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #8

0

Afficher le fichier

Fichier : GlossaryLinkPattern.py Projet : eduardomourar/verto

    def handleMatch(self, match):
        '''
        Turns a match into a glossary-link and adds the slug and
        identifier to the extension as part of the final result.
        Args:
            match: The string of text where the match was found.
        Returns:
            An element tree node to be appended to the html tree.
        '''
        text = match.group('text')
        arguments = match.group('args')
        argument_values = parse_arguments(self.processor, arguments,
                                          self.arguments)

        term = argument_values['term']
        reference = argument_values.get('reference-text', None)

        context = {'term': term, 'text': text}

        glossary_reference = self.ext_glossary_terms[term]
        if reference is not None:
            identifier = self.unique_slugify('glossary-' + term)
            glossary_reference.append((reference, identifier))
            context['id'] = identifier

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        return parser.get_root()

Exemple #9

0

Afficher le fichier

Fichier : ScratchTreeprocessor.py Projet : uccser/kordac

    def run(self, root):
        ''' Processes the html tree finding code tags where scratch
        code is used and replaces with template html.

        Args:
            root: The root of the document element tree.
        '''
        code_elements = []
        for node in root.iterfind('.//pre'):  # A modified tree will leave the iterator undefined.
            code_elements.append(node)

        for node in code_elements:
            self.process_html(node)

        if self.fenced_compatibility:
            for i in range(self.markdown.htmlStash.html_counter):
                html_string, safe = self.markdown.htmlStash.rawHtmlBlocks[i]
                node = None
                try:
                    parser = HtmlParser()
                    node = parser.feed(html_string).close().get_root()
                except etree.ParseError:
                    pass

                if node is None:
                    continue
                self.process_html(node)
                html_string = HtmlSerializer.tostring(node)
                self.markdown.htmlStash.rawHtmlBlocks[i] = html_string, safe

Exemple #10

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

    def test_example_basic_usage(self):
        '''Checks that the expected usecase works.
        '''
        input_text = self.read_test_file('example_basic_usage.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()
        self.assertEquals('html', root.tag)

        elements = list(root)
        self.assertEquals(1, len(elements))
        self.assertEquals('body', elements[0].tag)

        elements = list(elements[0])  # Open Body
        self.assertEquals(3, len(elements))
        self.assertEquals('h1', elements[0].tag)
        self.assertEquals('p', elements[1].tag)
        self.assertEquals('div', elements[2].tag)

        elements = list(elements[2])  # Open Div
        self.assertEquals(2, len(elements))
        self.assertEquals('img', elements[0].tag)
        self.assertEquals('a', elements[1].tag)

        img = elements[0]
        self.assertEquals('Example text.', img.get('alt'))
        self.assertEquals('example.com/example.jpg', img.get('src'))

        a = elements[1]
        self.assertEquals('https://www.example.com', a.get('href'))

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #11

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

    def test_example_data_and_subelements(self):
        '''Checks that data and subelements work together.
        '''
        input_text = self.read_test_file('example_data_and_subelements.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()
        self.assertEquals('html', root.tag)

        elements = list(root)
        self.assertEquals(1, len(elements))
        self.assertEquals('body', elements[0].tag)

        elements = list(elements[0])  # Open Body
        self.assertEquals(2, len(elements))
        self.assertEquals('h1', elements[0].tag)
        self.assertEquals('p', elements[1].tag)

        elements = list(elements[1])  # Open p
        self.assertEquals(3, len(elements))
        self.assertEquals('em', elements[0].tag)
        self.assertEquals('b', elements[1].tag)
        self.assertEquals('a', elements[2].tag)

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #12

0

Afficher le fichier

    def handleMatch(self, match):
        ''' Inherited from Pattern. Accepts a match and returns an
        ElementTree element of a internal link.

        Args:
            match: The string of text where the match was found.
        Returns:
            An element tree node to be appended to the html tree.
        '''
        arguments = match.group('args')
        argument_values = parse_arguments(self.processor, arguments,
                                          self.arguments)

        context = dict()
        # check if internal or external image
        file_path = argument_values['file-path']
        external_path_match = re.search(r'^http', file_path)
        if external_path_match is None:  # internal image
            self.required.add(file_path)
            file_relative = True
            context.update(image_file_name_components(file_path))
        else:
            file_relative = False
        context['full_file_path'] = file_path
        context['file_relative'] = file_relative
        context['alt'] = argument_values.get('alt', None)
        context['caption'] = argument_values.get('caption', None)
        context['caption_link'] = argument_values.get('caption-link', None)
        context['source_link'] = argument_values.get('source', None)
        context['hover_text'] = argument_values.get('hover-text', None)

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        return parser.get_root()

Exemple #13

0

Afficher le fichier

Fichier : GlossaryLinkPattern.py Projet : uccser/kordac

    def handleMatch(self, match):
        '''
        Turns a match into a glossary-link and adds the slug and
        identifier to the extension as part of the final result.
        Args:
            match: The string of text where the match was found.
        Returns:
            An element tree node to be appended to the html tree.
        '''
        text = match.group('text')
        arguments = match.group('args')
        argument_values = parse_arguments(self.processor, arguments, self.arguments)

        term = argument_values['term']
        reference = argument_values.get('reference-text', None)

        context = {
            'term': term,
            'text': text
        }

        glossary_reference = self.ext_glossary_terms[term]
        if reference is not None:
            identifier = self.unique_slugify('glossary-' + term)
            glossary_reference.append((reference, identifier))
            context['id'] = identifier

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        return parser.get_root()

Exemple #14

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

 def test_example_access_root_before_feed_error(self):
     '''Checks that the AttributeError is raised is the root element
     is accessed before it is created.
     '''
     parser = HtmlParser()
     with self.assertRaises(AttributeError):
         parser.get_root()

Exemple #15

0

Afficher le fichier

 def test_example_lone_end_tag_error(self):
     '''Checks that lone end tags cause an exception to be raised.
     '''
     input_text = self.read_test_file('example_lone_end_tag_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #16

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

 def test_example_data_without_tags_error(self):
     '''Checks that data without a root tag causes an exception to
     be raised.
     '''
     input_text = self.read_test_file('example_data_without_tags_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #17

0

Afficher le fichier

 def test_example_missing_end_tag_error(self):
     '''Checks that elements (that need to be closed) cause an
     exception to be raised.
     '''
     input_text = self.read_test_file('example_missing_end_tag_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #18

0

Afficher le fichier

 def test_example_multiple_roots_error(self):
     '''Checks that when multiple roots are detected that an exception
     is raised.
     '''
     input_text = self.read_test_file('example_multiple_roots_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #19

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

 def test_example_multiple_roots_error(self):
     '''Checks that when multiple roots are detected that an exception
     is raised.
     '''
     input_text = self.read_test_file('example_multiple_roots_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #20

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

 def test_example_missing_end_tag_error(self):
     '''Checks that elements (that need to be closed) cause an
     exception to be raised.
     '''
     input_text = self.read_test_file('example_missing_end_tag_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #21

0

Afficher le fichier

 def test_example_data_without_tags_error(self):
     '''Checks that data without a root tag causes an exception to
     be raised.
     '''
     input_text = self.read_test_file(
         'example_data_without_tags_error.html')
     parser = HtmlParser()
     with self.assertRaises(HtmlParseError):
         parser.feed(input_text).close()

Exemple #22

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

    def test_example_comment_ie(self):
        '''Checks that ie comments are added unchanged.
        '''
        input_text = self.read_test_file('example_comment_ie.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()

        self.assertEquals(etree.Comment, root.tag)

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #23

0

Afficher le fichier

    def test_example_comment(self):
        '''Checks that comments are added unchanged.
        '''
        input_text = self.read_test_file('example_comment.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()

        self.assertEquals(etree.Comment, root.tag)

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #24

0

Afficher le fichier

    def test_example_simple_void_tag(self):
        '''Checks that a simple (unclosed) void tag is created without
        error.
        '''
        input_text = self.read_test_file('example_simple_void_tag.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()

        self.assertEquals('img', root.tag)
        self.assertEquals('Example text.', root.get('alt'))
        self.assertEquals('example.com/example.jpg', root.get('src'))

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #25

0

Afficher le fichier

Fichier : HtmlParserTest.py Projet : uccser/kordac

    def test_example_simple_void_tag(self):
        '''Checks that a simple (unclosed) void tag is created without
        error.
        '''
        input_text = self.read_test_file('example_simple_void_tag.html')
        parser = HtmlParser()
        parser.feed(input_text).close()
        root = parser.get_root()

        self.assertEquals('img', root.tag)
        self.assertEquals('Example text.', root.get('alt'))
        self.assertEquals('example.com/example.jpg', root.get('src'))

        root_string = HtmlSerializer.tostring(root)
        self.assertEquals(input_text, root_string)

Exemple #26

0

Afficher le fichier

Fichier : VideoBlockProcessor.py Projet : eduardomourar/verto

    def run(self, parent, blocks):
        '''Replaces all video tags {video url="example"} with embeded
        video link. Inherited from BlockProcessor class.

        Args:
            parent: Element which this block is in.
            block: A string of markdown text to be converted.
        '''

        block = blocks.pop(0)

        match = self.pattern.search(block)
        before = block[:match.start()]
        after = block[match.end():]

        if before.strip() != '':
            self.parser.parseChunk(parent, before)
        if after.strip() != '':
            blocks.insert(0, after)

        arguments = match.group('args')
        argument_values = parse_arguments(self.processor, arguments, self.arguments)
        url = argument_values['url']

        (video_type, identifier) = self.extract_video_identifier(url)

        if not video_type:
            raise UnsupportedVideoPlayerError(block, url, 'unsupported video player')

        if not identifier:
            raise NoVideoIdentifierError(block, url, 'missing video identifier')

        context = self.process_parameters(self.processor, self.template_parameters, argument_values)
        context['identifier'] = identifier
        context['video_url'] = ''

        if url and video_type:
            if video_type == 'youtube':
                context['video_url'] = self.youtube_template.render(context)
            elif video_type == 'vimeo':
                context['video_url'] = self.vimeo_template.render(context)

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

Exemple #27

0

Afficher le fichier

Fichier : RelativeLinkPattern.py Projet : uccser/kordac

    def handleMatch(self, match):
        ''' Inherited from Pattern. Accepts a match and returns an
        ElementTree element of a internal link.
        Args:
            match: The string of text where the match was found.
        Returns:
            An element tree node to be appended to the html tree.
        '''
        context = dict()
        context['link_path'] = escape(match.group('link_url'))
        link_query = match.group('link_query')
        if link_query:
            context['link_query'] = link_query
        context['text'] = match.group('link_text')

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        return parser.get_root()

Exemple #28

0

Afficher le fichier

    def handleMatch(self, match):
        ''' Inherited from Pattern. Accepts a match and returns an
        ElementTree element of a internal link.
        Args:
            match: The string of text where the match was found.
        Returns:
            An element tree node to be appended to the html tree.
        '''
        context = dict()
        context['link_path'] = escape(match.group('link_url'))
        link_query = match.group('link_query')
        if link_query:
            context['link_query'] = link_query
        context['text'] = match.group('link_text')

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        return parser.get_root()

Exemple #29

0

Afficher le fichier

Fichier : ScratchTreeprocessor.py Projet : eduardomourar/verto

    def process_html(self, node):
        ''' Checks if given node is a scratch code tag and replaces
        with the given html template.

        Args:
            node: The possible pre node of a code block.
        '''
        children = list(node)
        if (len(children) == 1 and children[0].tag == 'code'):
            content = children[0].text.strip()
            language = children[0].attrib.get('class', content)
            language_in_content = 'class' not in children[0].attrib.keys()

            match = self.pattern.search(language)
            if match is not None:
                options = list(filter(None, match.group('options').split(':')))
                if language_in_content:
                    content = content[match.end():]

                content_blocks = list(filter(None, content.split('\n\n')))
                if 'random' in options:
                    shuffle(content_blocks)
                if 'split' not in options:
                    content_blocks = [
                        reduce(lambda x, y: '\n\n'.join([x, y]),
                               content_blocks)
                    ]

                images = []
                for block in content_blocks:
                    content_hash = ScratchTreeprocessor.hash_content(block)
                    self.update_required_images(content_hash, block)
                    images.append(content_hash)

                html_string = self.template.render({'images': images})
                parser = HtmlParser()
                new_node = parser.feed(html_string).close().get_root()

                node.tag = 'remove'
                node.text = ''
                node.append(new_node)
                node.remove(children[0])

Exemple #30

0

Afficher le fichier

Fichier : HeadingBlockProcessor.py Projet : eduardomourar/verto

    def run(self, parent, blocks):
        ''' Processes the block matching the heading and adding to the
        html tree and the verto heading tree.

        Args:
            parent: The parent node of the element tree that children
            will reside in.
            blocks: A list of strings of the document, where the
            first block tests true.
        '''
        block = blocks.pop(0)
        match = self.pattern.search(block)

        before = block[:match.start()]
        after = block[match.end():]

        if before:
            self.parser.parseBlocks(parent, [before])
        if after:
            blocks.insert(0, after)

        level = len(match.group('level'))
        heading = match.group('header').strip()
        heading_slug = self.custom_slugify(heading)
        level_trail = self.level_generator.next(level)

        context = dict()
        context['heading_level'] = level
        context['heading_type'] = 'h{0}'.format(level)
        context['title'] = heading
        context['title_slug'] = heading_slug
        for i, level_val in enumerate(level_trail):
            context['level_{0}'.format(i + 1)] = level_val

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

        self.add_to_heading_tree(heading, heading_slug, level)

Exemple #31

0

Afficher le fichier

Fichier : ScratchInlineTreeprocessor.py Projet : eduardomourar/verto

    def process_html(self, node):
        ''' Checks if given node is a scratch code tag and replaces
        with the given html template.

        Args:
            node: The possible pre node of a code block.
        '''
        content = node.text.strip()
        match = self.pattern.match(content)

        if match is not None:
            block = content[match.end():]
            content_hash = self.hash_content(block)
            self.update_required_images(content_hash, block)

            parser = HtmlParser()
            html_string = self.template.render({'hash': content_hash})
            new_node = parser.feed(html_string).close().get_root()

            node.tag = 'remove'
            node.text = ''
            node.append(new_node)

Exemple #32

0

Afficher le fichier

Fichier : ScratchTreeprocessor.py Projet : uccser/kordac

    def process_html(self, node):
        ''' Checks if given node is a scratch code tag and replaces
        with the given html template.

        Args:
            node: The possible pre node of a code block.
        '''
        children = list(node)
        if (len(children) == 1 and children[0].tag == 'code'):
            content = children[0].text.strip()
            language = children[0].attrib.get('class', content)
            language_in_content = 'class' not in children[0].attrib.keys()

            match = self.pattern.search(language)
            if match is not None:
                options = list(filter(None, match.group('options').split(':')))
                if language_in_content:
                    content = content[match.end():]

                content_blocks = list(filter(None, content.split('\n\n')))
                if 'random' in options:
                    shuffle(content_blocks)
                if 'split' not in options:
                    content_blocks = [reduce(lambda x, y: '\n\n'.join([x, y]), content_blocks)]

                images = []
                for block in content_blocks:
                    content_hash = ScratchTreeprocessor.hash_content(block)
                    self.update_required_images(content_hash, block)
                    images.append(content_hash)

                html_string = self.template.render({'images': images})
                parser = HtmlParser()
                new_node = parser.feed(html_string).close().get_root()

                node.tag = 'remove'
                node.text = ''
                node.append(new_node)
                node.remove(children[0])

Exemple #33

0

Afficher le fichier

Fichier : ConditionalProcessor.py Projet : eduardomourar/verto

    def run(self, parent, blocks):
        ''' Replaces all conditionals with the given html template.
        Allows for recursively defined if statements.

        Args:
            lines: A list of lines of the Markdown document to be
                converted.
        Returns:
            Markdown document with comments removed.
        Raises:
            TagNotMatchedError: When a condition tags does not have
                a matching start tag, or a start tag does not have
                a matching end tag.
        '''
        block = blocks.pop(0)
        context = dict()

        start_tag = self.pattern.search(block)
        is_if = tag_starts_with('if', start_tag.group('args'))

        # elif or else before an if conditional
        if not is_if:
            string = ''
            if tag_starts_with('elif', start_tag.group('args')):
                string = 'elif'
            elif tag_starts_with('else', start_tag.group('args')):
                string = 'else'
            elif tag_starts_with('end', start_tag.group('args')):
                'end'
            else:
                string = 'unrecognised'

            msg = '{} conditional found before if'.format(string)
            raise TagNotMatchedError(self.processor, block, msg)

        # Put left overs back on blocks, should be empty though
        if block[:start_tag.start()].strip() != '':
            self.parser.parseChunk(parent, block[:start_tag.start()])
        if block[start_tag.end():].strip() != '':
            blocks.insert(0, block[start_tag.end():])

        # Process if statement
        argument_values = parse_arguments(self.processor,
                                          start_tag.group('args'),
                                          self.arguments)
        if_expression = argument_values['condition']
        next_tag, block, content_blocks = self.get_content(blocks)
        if_content = self.parse_blocks(content_blocks)

        context['if_expression'] = if_expression
        context['if_content'] = if_content

        # Process elif statements
        elifs = OrderedDict()
        while next_tag is not None and tag_starts_with('elif',
                                                       next_tag.group('args')):
            argument_values = parse_arguments(self.processor,
                                              next_tag.group('args'),
                                              self.arguments)
            elif_expression = argument_values['condition']
            next_tag, block, content_blocks = self.get_content(blocks)
            content = self.parse_blocks(content_blocks)
            elifs[elif_expression] = content
        context['elifs'] = elifs

        # Process else statement
        has_else = next_tag is not None and tag_starts_with(
            'else', next_tag.group('args'))
        else_content = ''
        if has_else:
            argument_values = parse_arguments(self.processor,
                                              next_tag.group('args'),
                                              self.arguments)
            next_tag, block, content_blocks = self.get_content(blocks)
            else_content = self.parse_blocks(content_blocks)
        context['has_else'] = has_else
        context['else_content'] = else_content

        if (next_tag is None
                or (next_tag is not None
                    and not tag_starts_with('end', next_tag.group('args')))):
            msg = 'end conditional not found'
            raise TagNotMatchedError(self.processor, block, msg)

        # Render template and compile into an element
        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

Exemple #34

0

Afficher le fichier

Fichier : ConditionalProcessor.py Projet : uccser/kordac

    def run(self, parent, blocks):
        ''' Replaces all conditionals with the given html template.
        Allows for recursively defined if statements.

        Args:
            lines: A list of lines of the Markdown document to be
                converted.
        Returns:
            Markdown document with comments removed.
        Raises:
            TagNotMatchedError: When a condition tags does not have
                a matching start tag, or a start tag does not have
                a matching end tag.
        '''
        block = blocks.pop(0)
        context = dict()

        start_tag = self.pattern.search(block)
        is_if = tag_starts_with('if', start_tag.group('args'))

        # elif or else before an if conditional
        if not is_if:
            string = ''
            if tag_starts_with('elif', start_tag.group('args')):
                string = 'elif'
            elif tag_starts_with('else', start_tag.group('args')):
                string = 'else'
            elif tag_starts_with('end', start_tag.group('args')):
                'end'
            else:
                string = 'unrecognised'

            msg = '{} conditional found before if'.format(string)
            raise TagNotMatchedError(self.processor, block, msg)

        # Put left overs back on blocks, should be empty though
        if block[:start_tag.start()].strip() != '':
            self.parser.parseChunk(parent, block[:start_tag.start()])
        if block[start_tag.end():].strip() != '':
            blocks.insert(0, block[start_tag.end():])

        # Process if statement
        argument_values = parse_arguments(self.processor, start_tag.group('args'), self.arguments)
        if_expression = argument_values['condition']
        next_tag, block, content_blocks = self.get_content(blocks)
        if_content = self.parse_blocks(content_blocks)

        context['if_expression'] = if_expression
        context['if_content'] = if_content

        # Process elif statements
        elifs = OrderedDict()
        while next_tag is not None and tag_starts_with('elif', next_tag.group('args')):
            argument_values = parse_arguments(self.processor, next_tag.group('args'), self.arguments)
            elif_expression = argument_values['condition']
            next_tag, block, content_blocks = self.get_content(blocks)
            content = self.parse_blocks(content_blocks)
            elifs[elif_expression] = content
        context['elifs'] = elifs

        # Process else statement
        has_else = next_tag is not None and tag_starts_with('else', next_tag.group('args'))
        else_content = ''
        if has_else:
            argument_values = parse_arguments(self.processor, next_tag.group('args'), self.arguments)
            next_tag, block, content_blocks = self.get_content(blocks)
            else_content = self.parse_blocks(content_blocks)
        context['has_else'] = has_else
        context['else_content'] = else_content

        if (next_tag is None or (next_tag is not None and not tag_starts_with('end', next_tag.group('args')))):
            msg = 'end conditional not found'
            raise TagNotMatchedError(self.processor, block, msg)

        # Render template and compile into an element
        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

Exemple #35

0

Afficher le fichier

    def run(self, parent, blocks):
        ''' Generic run method for container tags.

        Args:
            parent: The parent node of the element tree that children
                will reside in.
            blocks: A list of strings of the document, where the
                first block tests true.

        Raises:
            ArgumentValueError: If value for a given argument is incorrect.
            TagNotMatchedError: If end tag is not found for corresponding start tag.
        '''
        block = blocks.pop(0)

        start_tag = self.p_start.search(block)
        end_tag = self.p_end.search(block)

        if ((start_tag is None and end_tag is not None) or
            (start_tag and end_tag and start_tag.end() > end_tag.start())):
            raise TagNotMatchedError(self.processor, block,
                                     'end tag found before start tag')

        before = block[:start_tag.start()]
        after = block[start_tag.end():]

        if before.strip() != '':
            self.parser.parseChunk(parent, before)
        if after.strip() != '':
            blocks.insert(0, after)

        argument_values = parse_arguments(self.processor,
                                          start_tag.group('args'),
                                          self.arguments)

        content_blocks = []
        the_rest = ''
        inner_start_tags = 0
        inner_end_tags = 0

        while len(blocks) > 0:
            block = blocks.pop(0)
            inner_tag = self.p_start.search(block)
            end_tag = self.p_end.search(block)

            if ((inner_tag and end_tag is None) or
                (inner_tag and end_tag and inner_tag.start() < end_tag.end())):
                inner_start_tags += 1

            if end_tag and inner_start_tags == inner_end_tags:
                content_blocks.append(block[:end_tag.start()])
                the_rest = block[end_tag.end():]
                break
            elif end_tag:
                inner_end_tags += 1
                end_tag = None
            content_blocks.append(block)

        content_blocks, extra_args = self.custom_parsing(
            content_blocks, argument_values)
        argument_values.update(extra_args)

        if the_rest.strip() != '':
            blocks.insert(0, the_rest)

        if end_tag is None or inner_start_tags != inner_end_tags:
            raise TagNotMatchedError(self.processor, block,
                                     'no end tag found to close start tag')

        content_tree = etree.Element('content')
        self.parser.parseChunk(content_tree, blocks_to_string(content_blocks))

        content = ''
        for child in content_tree:
            content += HtmlSerializer.tostring(child) + '\n'
        content = content.strip('\n')

        if content.strip() == '':
            message = 'content cannot be blank.'
            raise ArgumentValueError(self.processor, 'content', content,
                                     message)

        argument_values['content'] = content
        context = self.process_parameters(self.processor,
                                          self.template_parameters,
                                          argument_values)

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())

Exemple #36

0

Afficher le fichier

Fichier : GenericContainerBlockProcessor.py Projet : uccser/kordac

    def run(self, parent, blocks):
        ''' Generic run method for container tags.

        Args:
            parent: The parent node of the element tree that children
                will reside in.
            blocks: A list of strings of the document, where the
                first block tests true.

        Raises:
            ArgumentValueError: If value for a given argument is incorrect.
            TagNotMatchedError: If end tag is not found for corresponding start tag.
        '''
        block = blocks.pop(0)

        start_tag = self.p_start.search(block)
        end_tag = self.p_end.search(block)

        if ((start_tag is None and end_tag is not None) or
           (start_tag and end_tag and start_tag.end() > end_tag.start())):
            raise TagNotMatchedError(self.processor, block, 'end tag found before start tag')

        before = block[:start_tag.start()]
        after = block[start_tag.end():]

        if before.strip() != '':
            self.parser.parseChunk(parent, before)
        if after.strip() != '':
            blocks.insert(0, after)

        argument_values = parse_arguments(self.processor, start_tag.group('args'), self.arguments)

        content_blocks = []
        the_rest = ''
        inner_start_tags = 0
        inner_end_tags = 0

        while len(blocks) > 0:
            block = blocks.pop(0)
            inner_tag = self.p_start.search(block)
            end_tag = self.p_end.search(block)

            if ((inner_tag and end_tag is None) or (inner_tag and end_tag and inner_tag.start() < end_tag.end())):
                inner_start_tags += 1

            if end_tag and inner_start_tags == inner_end_tags:
                content_blocks.append(block[:end_tag.start()])
                the_rest = block[end_tag.end():]
                break
            elif end_tag:
                inner_end_tags += 1
                end_tag = None
            content_blocks.append(block)

        content_blocks, extra_args = self.custom_parsing(content_blocks, argument_values)
        argument_values.update(extra_args)

        if the_rest.strip() != '':
            blocks.insert(0, the_rest)

        if end_tag is None or inner_start_tags != inner_end_tags:
            raise TagNotMatchedError(self.processor, block, 'no end tag found to close start tag')

        content_tree = etree.Element('content')
        self.parser.parseChunk(content_tree, blocks_to_string(content_blocks))

        content = ''
        for child in content_tree:
            content += HtmlSerializer.tostring(child) + '\n'
        content = content.strip('\n')

        if content.strip() == '':
            message = 'content cannot be blank.'
            raise ArgumentValueError(self.processor, 'content', content, message)

        argument_values['content'] = content
        context = self.process_parameters(self.processor, self.template_parameters, argument_values)

        html_string = self.template.render(context)
        parser = HtmlParser()
        parser.feed(html_string).close()
        parent.append(parser.get_root())