def scan(hostname: str, site_id: int, scan_id: int): try: # Once celery kicks off the task, let's update the scan state from PENDING to STARTED update_scan_state(scan_id, STATE_STARTED) # Attempt to retrieve all the resources reqs = retrieve_all(hostname) # If we can't connect at all, let's abort the test if reqs['responses']['auto'] is None: update_scan_state(scan_id, STATE_FAILED, error='site down') return # Execute each test, replacing the underscores in the function name with dashes in the test name for test in httpobs.scanner.analyzer.tests: # TODO: Get overridden expectations insert_test_result(site_id, scan_id, test.__name__.replace('_', '-'), test(reqs)) # catch the celery timeout, which will almost certainly occur in retrieve_all() except (SoftTimeLimitExceeded, TimeLimitExceeded): update_scan_state(scan_id, STATE_ABORTED, error='site unresponsive') except: # TODO: have more specific error messages e = sys.exc_info()[1] # get the error message # Print the exception to stdout if we're in dev if 'HTTPOBS_DEV' in environ: import traceback print('Error detected in: ' + hostname) traceback.print_exc() # If we are unsuccessful, close out the scan in the database update_scan_state(scan_id, STATE_FAILED, error=repr(e))
def test_retrieve_mozilla(self): reqs = retrieve_all('mozilla.org') # Various things we know about mozilla.org self.assertIsNotNone(reqs['resources']['/']) self.assertIsNotNone(reqs['resources']['/contribute.json']) self.assertIsNotNone(reqs['resources']['/robots.txt']) self.assertIsNone(reqs['resources']['/clientaccesspolicy.xml']) self.assertIsNone(reqs['resources']['/crossdomain.xml']) self.assertIsInstance(reqs['responses']['auto'], requests.Response) self.assertIsInstance(reqs['responses']['cors'], requests.Response) self.assertIsInstance(reqs['responses']['http'], requests.Response) self.assertIsInstance(reqs['responses']['https'], requests.Response) self.assertIsInstance(reqs['session'], requests.Session) self.assertEquals(reqs['hostname'], 'mozilla.org') self.assertEquals( 'text/html', reqs['responses']['auto'].headers['Content-Type'][0:9]) self.assertEquals(2, len(reqs['responses']['auto'].history)) self.assertEquals(200, reqs['responses']['auto'].status_code) self.assertEquals('https://www.mozilla.org/en-US/', reqs['responses']['auto'].url)
def test_retrieve_non_existent_domain(self): domain = ''.join(random.choice(string.ascii_lowercase) for _ in range(223)) + '.net' reqs = retrieve_all(domain) self.assertIsNone(reqs['responses']['auto']) self.assertIsNone(reqs['responses']['cors']) self.assertIsNone(reqs['responses']['http']) self.assertIsNone(reqs['responses']['https']) self.assertIsNone(reqs['session']) self.assertEquals(domain, reqs['hostname']) self.assertEquals({}, reqs['resources'])
def scan(hostname: str, site_id: int, scan_id: int): try: # Once celery kicks off the task, let's update the scan state from PENDING to RUNNING update_scan_state(scan_id, STATE_RUNNING) # Get the site's cookies and headers headers = select_site_headers(hostname) # Attempt to retrieve all the resources reqs = retrieve_all(hostname, cookies=headers['cookies'], headers=headers['headers']) # If we can't connect at all, let's abort the test if reqs['responses']['auto'] is None: update_scan_state(scan_id, STATE_FAILED, error='site down') return # Execute each test, replacing the underscores in the function name with dashes in the test name # TODO: Get overridden expectations insert_test_results(site_id, scan_id, [test(reqs) for test in tests], sanitize_headers(reqs['responses']['auto'].headers), reqs['responses']['auto'].status_code) # catch the celery timeout, which will almost certainly occur in retrieve_all() except SoftTimeLimitExceeded: update_scan_state(scan_id, STATE_ABORTED, error='site unresponsive') except (TimeLimitExceeded, WorkerLostError, WorkerShutdown, WorkerTerminate): raise # the database is down, oh no! except IOError: print('database down, aborting scan on {hostname}'.format(hostname=hostname), file=sys.stderr) except: # TODO: have more specific error messages e = sys.exc_info()[1] # get the error message # If we are unsuccessful, close out the scan in the database update_scan_state(scan_id, STATE_FAILED, error=repr(e)) # Print the exception to stderr if we're in dev if DEVELOPMENT_MODE: import traceback print('Error detected in scan for : ' + hostname) traceback.print_exc(file=sys.stderr)
def test_retrieve_mozilla(self): reqs = retrieve_all('mozilla.org') # Various things we know about mozilla.org self.assertIsNotNone(reqs['resources']['/']) self.assertIsNotNone(reqs['resources']['/contribute.json']) self.assertIsNotNone(reqs['resources']['/robots.txt']) self.assertIsNone(reqs['resources']['/clientaccesspolicy.xml']) self.assertIsNone(reqs['resources']['/crossdomain.xml']) self.assertIsInstance(reqs['responses']['auto'], requests.Response) self.assertIsInstance(reqs['responses']['cors'], requests.Response) self.assertIsInstance(reqs['responses']['http'], requests.Response) self.assertIsInstance(reqs['responses']['https'], requests.Response) self.assertIsInstance(reqs['session'], requests.Session) self.assertEquals(reqs['hostname'], 'mozilla.org') self.assertEquals('text/html', reqs['responses']['auto'].headers['Content-Type'][0:9]) self.assertEquals(2, len(reqs['responses']['auto'].history)) self.assertEquals(200, reqs['responses']['auto'].status_code) self.assertEquals('https://www.mozilla.org/en-US/', reqs['responses']['auto'].url)
def test_retrieve_invalid_cert(self): reqs = retrieve_all('expired.badssl.com') self.assertFalse(reqs['responses']['auto'].verified)
def scan(hostname, **kwargs): """Performs an Observatory scan, but doesn't require any database/redis backing. Given the lowered security concerns due to not being a public API, you can use this to scan arbitrary ports and paths. Args: hostname (str): domain name for host to be scanned Kwargs: http_port (int): port to scan for HTTP, instead of 80 https_port (int): port to be scanned for HTTPS, instead of 443 path (str): path to scan, instead of "/" verify (bool): whether to enable or disable certificate verification, enabled by default. This can allow tested sites to pass the HSTS and HPKP tests, even with self-signed certificates. cookies (dict): Cookies sent to the system being scanned. Matches the requests cookie dict. headers (dict): HTTP headers sent to the system being scanned. Format matches the requests headers dict. Returns: A dict representing the analyze (scan) and getScanResults (test) API call. Example: { 'scan': { 'grade': 'A' ... }, 'test': { 'content-security-policy': { 'pass': True ... } } } """ # Always allow localhost scans when run in this way httpobs.conf.SCANNER_ALLOW_LOCALHOST = True # Attempt to retrieve all the resources, not capturing exceptions reqs = retrieve_all(hostname, **kwargs) # If we can't connect at all, let's abort the test if reqs['responses']['auto'] is None: return {'error': 'site down'} # Get all the results results = [test(reqs) for test in tests] for result in results: result['score_description'] = get_score_description(result['result']) # Get the score, grade, etc. grades = get_grade_and_likelihood_for_score(100 + sum([result.get('score_modifier', 0) for result in results])) tests_passed = sum([1 if result.get('pass') else 0 for result in results]) # Return the results return({ 'scan': { 'grade': grades[1], 'likelihood_indicator': grades[2], 'response_headers': dict(reqs['responses']['auto'].headers), 'score': grades[0], 'tests_failed': NUM_TESTS - tests_passed, 'tests_passed': tests_passed, 'tests_quantity': NUM_TESTS, }, 'tests': {result.pop('name'): result for result in results} })
def scan(hostname, **kwargs): """Performs an Observatory scan, but doesn't require any database/redis backing. Given the lowered security concerns due to not being a public API, you can use this to scan arbitrary ports and paths. Args: hostname (str): domain name for host to be scanned. Must not include protocol (http://, https://) or port number (:80). Kwargs: http_port (int): port to scan for HTTP, instead of 80 https_port (int): port to be scanned for HTTPS, instead of 443 path (str): path to scan, instead of "/" verify (bool): whether to enable or disable certificate verification, enabled by default. This can allow tested sites to pass the HSTS and HPKP tests, even with self-signed certificates. cookies (dict): Cookies sent to the system being scanned. Matches the requests cookie dict. headers (dict): HTTP headers sent to the system being scanned. Format matches the requests headers dict. Returns: A dict representing the analyze (scan) and getScanResults (test) API call. Example: { 'scan': { 'grade': 'A' ... }, 'test': { 'content-security-policy': { 'pass': True ... } } } """ # Always allow localhost scans when run in this way httpobs.conf.SCANNER_ALLOW_LOCALHOST = True # Attempt to retrieve all the resources, not capturing exceptions reqs = retrieve_all(hostname, **kwargs) # If we can't connect at all, let's abort the test if reqs['responses']['auto'] is None: return {'error': 'site down'} # Get all the results results = [test(reqs) for test in tests] for result in results: result['score_description'] = get_score_description(result['result']) # Get the score, grade, etc. grades = get_grade_and_likelihood_for_score( 100 + sum([result.get('score_modifier', 0) for result in results])) tests_passed = sum([1 if result.get('pass') else 0 for result in results]) # Return the results return ({ 'scan': { 'grade': grades[1], 'likelihood_indicator': grades[2], 'response_headers': dict(reqs['responses']['auto'].headers), 'score': grades[0], 'tests_failed': NUM_TESTS - tests_passed, 'tests_passed': tests_passed, 'tests_quantity': NUM_TESTS, }, 'tests': {result.pop('name'): result for result in results} })
def scan(hostname, **kwargs): """Performs an Observatory scan, but doesn't require any database/redis backing. Given the lowered security concerns due to not being a public API, you can use this to scan arbitrary ports and paths. Args: hostname (str): domain name for host to be scanned. Must not include protocol (http://, https://) or port number (:80). Kwargs: http_port (int): port to scan for HTTP, instead of 80 https_port (int): port to be scanned for HTTPS, instead of 443 path (str): path to scan, instead of "/" verify (bool): whether to enable or disable certificate verification, enabled by default. This can allow tested sites to pass the HSTS and HPKP tests, even with self-signed certificates. cookies (dict): Cookies sent to the system being scanned. Matches the requests cookie dict. headers (dict): HTTP headers sent to the system being scanned. Format matches the requests headers dict. Returns: A dict representing the analyze (scan) and getScanResults (test) API call. Example: { 'scan': { 'grade': 'A' ... }, 'test': { 'content-security-policy': { 'pass': True ... } } } """ # Always allow localhost scans when run in this way httpobs.conf.SCANNER_ALLOW_LOCALHOST = True # Attempt to retrieve all the resources, not capturing exceptions reqs = retrieve_all(hostname, **kwargs) # If we can't connect at all, let's abort the test if reqs['responses']['auto'] is None: return {'error': 'site down'} # Code based on httpobs.database.insert_test_results tests_failed = tests_passed = 0 score_with_extra_credit = uncurved_score = 100 results = {} for test in tests: # Get result for this test result = test(reqs) # Add the result with a score_description result['score_description'] = get_score_description(result['result']) results[result.pop('name')] = result # Keep track of how many tests passed or failed if result.get('pass'): tests_passed += 1 else: tests_failed += 1 # And keep track of the score score_modifier = result.get('score_modifier') score_with_extra_credit += score_modifier if score_modifier < 0: uncurved_score += score_modifier # Only record the full score if the uncurved score already receives an A score = score_with_extra_credit if uncurved_score >= MINIMUM_SCORE_FOR_EXTRA_CREDIT else uncurved_score # Get the score, grade, etc. score, grade, likelihood_indicator = get_grade_and_likelihood_for_score( score) # Return the results return ({ 'scan': { 'grade': grade, 'likelihood_indicator': likelihood_indicator, 'response_headers': dict(reqs['responses']['auto'].headers), 'score': score, 'tests_failed': tests_failed, 'tests_passed': tests_passed, 'tests_quantity': NUM_TESTS, }, 'tests': results })