Ejemplo n.º 1
0
 def _check_stream_has_not_ended(self, tagname=None):
     if not self._stream:
         if tagname:
             msg = '">" expected for closing %s tag' % tagname
             raise ValidationError(msg)
         else:
             raise ValidationError('stream ended unexpectedly')
Ejemplo n.º 2
0
    def _parse_tag_attrs(self, tagname):
        attributes = []
        result = {'attributes': attributes}
        while not (self._stream.startswith('>') or
                       self._stream.startswith('/>')):
            # We start reading an attribute name and then process it
            # looking for a possible value in the rhs of the equality
            name, self._stream = read_tok(attr_name_re, self._stream)
            if not name:
                msg = 'expect attribute on open "%s" tag' % tagname
                raise ValidationError(msg)

            # Now we look for the "=" sign to decide if we must check the
            # rhs of the attribute declaration
            op, self._stream = read_tok(eq_sign_re, self._stream)
            if not op:
                value = None
            elif self._stream.startswith('"'):
                value, self._stream = read_tok(string_re, self._stream)
            else:
                value, sep, self._stream = re_partition(
                    tag_separator_re, self._stream)
                if sep.strip():
                    self._stream = sep.strip() + self._stream
                if not attr_value_re.fullmatch(value):
                    msg = 'invalid tag attribute: %r' % value
                    raise ValidationError(msg)

            attributes.append((name, value))
            self._check_stream_has_not_ended(tagname=tagname)

        self._stream = self._stream[1:]  # strip the first ">" character
        return result
Ejemplo n.º 3
0
def validate_tag_name(tagname):
    """
    Raise ValidationError() if tagname is not valid.
    """

    if tagname_re.fullmatch(tagname) is None:
        raise ValidationError('invalid tag name: %r' % tagname)
Ejemplo n.º 4
0
    def _parse_endtag(self):
        tag, _, self._stream = self._stream.partition('>')
        tagname = tag[2:]  # remove "</"
        if not tagname_re.fullmatch(tagname):
            raise ValidationError('invalid closing tag name: %s' % tagname)

        while self._open_tags:
            last = self._open_tags.pop()
            if last.tag == tagname:
                break
            elif last.self_closing:
                continue
            raise ValidationError('forgot to close tag: %r' % last.tag)
        else:
            raise ValidationError(
                'closing tag </%s> without an open tag.' % tagname
            )
        return CLOSE_TAG
Ejemplo n.º 5
0
    def strict_validator(self):
        """
        Strict validation method.

        We just call html5lib parser with strict=True. Error messages are awful,
        and it complaints about many small errors, so it can be annoying.
        """

        strict_parser = HTMLParser(strict=True)
        try:
            strict_parser.parse(self.data)
        except ParseError as ex:
            raise ValidationError(str(ex))
Ejemplo n.º 6
0
def _check_url_worker(url,
                      method=None,
                      post=None,
                      login=None,
                      login_required=False,
                      codes=None,
                      html5=False,
                      html5_validator=None,
                      xhtml=False,
                      client=None):

    client = _get_valid_client(client, login)

    # Build kwargs for executing client's .get(), .post() or other HTTP methods
    if post and method is None:
        method = 'POST'
    method = method or 'GET'

    # Fetch data from server object
    args = (post, ) if post else ()
    response = client.open(url, method, *args)

    # Check response code
    if isinstance(codes, int):
        codes = [codes]
    status_code = response.status_code
    if status_code not in codes:
        msg = '%s: received invalid status code: %s' % (url, status_code)
        raise ValidationError(msg)

    # Now we check if the content HTML data validates
    if html5:
        html5_validator = _get_html5_validator(html5_validator)
        try:
            html5_validator(response.content)
        except ValidationError as ex:
            raise ValidationError('%s: %s' % (url, ex))
Ejemplo n.º 7
0
 def _parse_doctype(self):
     data, sep, self._stream = self._stream.partition('>')
     if (not sep) or (not data) or (data[9] != ' '):
         raise ValidationError('invalid doctype declaration: %s>' % data)
     data = data[10:]  # remove "<!DOCTYPE "
     return DOCTYPE.create_valid(data.strip())
Ejemplo n.º 8
0
 def _parse_comments(self):
     data, sep, self._stream = self._stream[4:].partition('-->')
     if not sep:
         raise ValidationError('expect --> to close a comment.')
     return Comment(data)
Ejemplo n.º 9
0
 def validate(self):
     if tagname_re.fullmatch(self.tag) is None:
         raise ValidationError('invalid tag name: %r' % self.tag)
Ejemplo n.º 10
0
 def validate(self):
     if '-->' in self:
         raise ValidationError('invalid comment: closing twice with "-->"')
Ejemplo n.º 11
0
 def validate(self):
     if self != 'html':
         raise ValidationError('please use the <!DOCTYPE html> declaration '
                               'for html5 documents.')
Ejemplo n.º 12
0
 def validate(self):
     if '<' in self.data or '>' in self.data:
         raise ValidationError('forgot to escape <> inside text')