def clean(self): cleaned = super(CheckedFileForm, self).clean() if self._errors: return cleaned # Check the URL is a valid URL and exists exists, fmt, size, error_msg = url_exists(cleaned['url']) if not exists: self._errors['url'] = \ [error_msg] # TODO: Consider uncommenting this # if fmt == 'HTML': # self._errors['url'] = \ # [_("This appears to be a web page and not a data file")] cleaned['is_broken'] = False cleaned['last_check'] = datetime.datetime.now() cleaned['format'] = fmt cleaned['size'] = size return cleaned
def test_url_exists_ok(self): exists, fmt, size, error = url_exists('https://google.com') assert exists assert fmt == 'HTML', fmt assert not error
def test_url_missing_proto(self): exists, fmt, size, error = url_exists('data.gov.uk') assert not exists assert fmt == '' assert 'https' in error
def test_url_does_not_exist(self): exists, fmt, size, error = url_exists('https://12345.12345.12345.org') assert not exists assert fmt == '' assert 'Failed to connect' in error
def test_url_exists_csv(self): exists, fmt, size, error = url_exists( 'https://data.gov.uk/data/site-usage/data_all.csv') assert exists assert fmt == 'CSV', fmt assert not error