def check(link, parse, possible_paths=None): if not possible_paths: errors = plugins.parseable(link) if not errors: return link, True elif ('invalid' in [error[1] for error in errors] or 'invalid_link' in [error[1] for error in errors]): return None, False else: return link, False else: parts = [possible_paths[:i] for i in range(1,len(possible_paths)+1)] paths = ['/'.join(part) for part in parts] possible_links = ['{scheme}://{netloc}{path}{link}'.format( scheme=parse.scheme, netloc=parse.netloc, path=path, link=link) for path in paths] for plink in possible_links: errors = plugins.parseable(plink) result = plink if not errors: return plink, True else: for error in errors: if error[1] == 'invalid' or error[1] == 'invalid_link': result = None elif error[1] == 'not_parseable': break logging.debug("{errors}, URL: {link}".format(link=link, errors=\ ' '.join(['{0}{1}'.format(error[0], error[1]) for\ error in errors]))) logging.debug("Links checked:\n {links}".format(links=\ '\t'.join([link for link in possible_links]))) return result, False
def test_parseable_fail(self): for link in ['test.js' , 'test.css', 'test.zip', 'test.jpg', '_images/schema.png', 'javascript:void(0)', 'http://rdc-blga.appspot.com/html/_images/schema.png', 'mailto:[email protected]', 'http://rdc-blga.appspot.com/html', "http://rdc-python.googlecode.com/files/eclipse_templates.zip"]: errors = parseable(link) self.assertFalse([] == errors, "{link} is not parseable, but no" " errors weren't found".format(link=link))
def test_parseable_ok(self): for link in [ u'http://sphinx.pocoo.org/', u'http://python.org/']: errors = parseable(link) self.assertListEqual(errors, [],[error for error in errors])