def _check_stream_has_not_ended(self, tagname=None): if not self._stream: if tagname: msg = '">" expected for closing %s tag' % tagname raise ValidationError(msg) else: raise ValidationError('stream ended unexpectedly')
def _parse_tag_attrs(self, tagname): attributes = [] result = {'attributes': attributes} while not (self._stream.startswith('>') or self._stream.startswith('/>')): # We start reading an attribute name and then process it # looking for a possible value in the rhs of the equality name, self._stream = read_tok(attr_name_re, self._stream) if not name: msg = 'expect attribute on open "%s" tag' % tagname raise ValidationError(msg) # Now we look for the "=" sign to decide if we must check the # rhs of the attribute declaration op, self._stream = read_tok(eq_sign_re, self._stream) if not op: value = None elif self._stream.startswith('"'): value, self._stream = read_tok(string_re, self._stream) else: value, sep, self._stream = re_partition( tag_separator_re, self._stream) if sep.strip(): self._stream = sep.strip() + self._stream if not attr_value_re.fullmatch(value): msg = 'invalid tag attribute: %r' % value raise ValidationError(msg) attributes.append((name, value)) self._check_stream_has_not_ended(tagname=tagname) self._stream = self._stream[1:] # strip the first ">" character return result
def validate_tag_name(tagname): """ Raise ValidationError() if tagname is not valid. """ if tagname_re.fullmatch(tagname) is None: raise ValidationError('invalid tag name: %r' % tagname)
def _parse_endtag(self): tag, _, self._stream = self._stream.partition('>') tagname = tag[2:] # remove "</" if not tagname_re.fullmatch(tagname): raise ValidationError('invalid closing tag name: %s' % tagname) while self._open_tags: last = self._open_tags.pop() if last.tag == tagname: break elif last.self_closing: continue raise ValidationError('forgot to close tag: %r' % last.tag) else: raise ValidationError( 'closing tag </%s> without an open tag.' % tagname ) return CLOSE_TAG
def strict_validator(self): """ Strict validation method. We just call html5lib parser with strict=True. Error messages are awful, and it complaints about many small errors, so it can be annoying. """ strict_parser = HTMLParser(strict=True) try: strict_parser.parse(self.data) except ParseError as ex: raise ValidationError(str(ex))
def _check_url_worker(url, method=None, post=None, login=None, login_required=False, codes=None, html5=False, html5_validator=None, xhtml=False, client=None): client = _get_valid_client(client, login) # Build kwargs for executing client's .get(), .post() or other HTTP methods if post and method is None: method = 'POST' method = method or 'GET' # Fetch data from server object args = (post, ) if post else () response = client.open(url, method, *args) # Check response code if isinstance(codes, int): codes = [codes] status_code = response.status_code if status_code not in codes: msg = '%s: received invalid status code: %s' % (url, status_code) raise ValidationError(msg) # Now we check if the content HTML data validates if html5: html5_validator = _get_html5_validator(html5_validator) try: html5_validator(response.content) except ValidationError as ex: raise ValidationError('%s: %s' % (url, ex))
def _parse_doctype(self): data, sep, self._stream = self._stream.partition('>') if (not sep) or (not data) or (data[9] != ' '): raise ValidationError('invalid doctype declaration: %s>' % data) data = data[10:] # remove "<!DOCTYPE " return DOCTYPE.create_valid(data.strip())
def _parse_comments(self): data, sep, self._stream = self._stream[4:].partition('-->') if not sep: raise ValidationError('expect --> to close a comment.') return Comment(data)
def validate(self): if tagname_re.fullmatch(self.tag) is None: raise ValidationError('invalid tag name: %r' % self.tag)
def validate(self): if '-->' in self: raise ValidationError('invalid comment: closing twice with "-->"')
def validate(self): if self != 'html': raise ValidationError('please use the <!DOCTYPE html> declaration ' 'for html5 documents.')
def validate(self): if '<' in self.data or '>' in self.data: raise ValidationError('forgot to escape <> inside text')