Esempio n. 1
0
def scan(hostname: str, site_id: int, scan_id: int):
    try:
        # Once celery kicks off the task, let's update the scan state from PENDING to STARTED
        update_scan_state(scan_id, STATE_STARTED)

        # Attempt to retrieve all the resources
        reqs = retrieve_all(hostname)

        # If we can't connect at all, let's abort the test
        if reqs['responses']['auto'] is None:
            update_scan_state(scan_id, STATE_FAILED, error='site down')

            return

        # Execute each test, replacing the underscores in the function name with dashes in the test name
        for test in httpobs.scanner.analyzer.tests:
            # TODO: Get overridden expectations
            insert_test_result(site_id, scan_id, test.__name__.replace('_', '-'), test(reqs))

    # catch the celery timeout, which will almost certainly occur in retrieve_all()
    except (SoftTimeLimitExceeded, TimeLimitExceeded):
        update_scan_state(scan_id, STATE_ABORTED, error='site unresponsive')
    except:
        # TODO: have more specific error messages
        e = sys.exc_info()[1]  # get the error message

        # Print the exception to stdout if we're in dev
        if 'HTTPOBS_DEV' in environ:
            import traceback
            print('Error detected in: ' + hostname)
            traceback.print_exc()

        # If we are unsuccessful, close out the scan in the database
        update_scan_state(scan_id, STATE_FAILED, error=repr(e))
    def test_retrieve_mozilla(self):
        reqs = retrieve_all('mozilla.org')

        # Various things we know about mozilla.org
        self.assertIsNotNone(reqs['resources']['/'])
        self.assertIsNotNone(reqs['resources']['/contribute.json'])
        self.assertIsNotNone(reqs['resources']['/robots.txt'])
        self.assertIsNone(reqs['resources']['/clientaccesspolicy.xml'])
        self.assertIsNone(reqs['resources']['/crossdomain.xml'])

        self.assertIsInstance(reqs['responses']['auto'], requests.Response)
        self.assertIsInstance(reqs['responses']['cors'], requests.Response)
        self.assertIsInstance(reqs['responses']['http'], requests.Response)
        self.assertIsInstance(reqs['responses']['https'], requests.Response)
        self.assertIsInstance(reqs['session'], requests.Session)

        self.assertEquals(reqs['hostname'], 'mozilla.org')

        self.assertEquals(
            'text/html',
            reqs['responses']['auto'].headers['Content-Type'][0:9])
        self.assertEquals(2, len(reqs['responses']['auto'].history))
        self.assertEquals(200, reqs['responses']['auto'].status_code)
        self.assertEquals('https://www.mozilla.org/en-US/',
                          reqs['responses']['auto'].url)
Esempio n. 3
0
def scan(hostname: str, site_id: int, scan_id: int):
    try:
        # Once celery kicks off the task, let's update the scan state from PENDING to STARTED
        update_scan_state(scan_id, STATE_STARTED)

        # Attempt to retrieve all the resources
        reqs = retrieve_all(hostname)

        # If we can't connect at all, let's abort the test
        if reqs['responses']['auto'] is None:
            update_scan_state(scan_id, STATE_FAILED, error='site down')

            return

        # Execute each test, replacing the underscores in the function name with dashes in the test name
        for test in httpobs.scanner.analyzer.tests:
            # TODO: Get overridden expectations
            insert_test_result(site_id, scan_id,
                               test.__name__.replace('_', '-'), test(reqs))

    # catch the celery timeout, which will almost certainly occur in retrieve_all()
    except (SoftTimeLimitExceeded, TimeLimitExceeded):
        update_scan_state(scan_id, STATE_ABORTED, error='site unresponsive')
    except:
        # TODO: have more specific error messages
        e = sys.exc_info()[1]  # get the error message

        # Print the exception to stdout if we're in dev
        if 'HTTPOBS_DEV' in environ:
            import traceback
            print('Error detected in: ' + hostname)
            traceback.print_exc()

        # If we are unsuccessful, close out the scan in the database
        update_scan_state(scan_id, STATE_FAILED, error=repr(e))
    def test_retrieve_non_existent_domain(self):
        domain = ''.join(random.choice(string.ascii_lowercase) for _ in range(223)) + '.net'
        reqs = retrieve_all(domain)

        self.assertIsNone(reqs['responses']['auto'])
        self.assertIsNone(reqs['responses']['cors'])
        self.assertIsNone(reqs['responses']['http'])
        self.assertIsNone(reqs['responses']['https'])
        self.assertIsNone(reqs['session'])

        self.assertEquals(domain, reqs['hostname'])
        self.assertEquals({}, reqs['resources'])
Esempio n. 5
0
    def test_retrieve_non_existent_domain(self):
        domain = ''.join(random.choice(string.ascii_lowercase) for _ in range(223)) + '.net'
        reqs = retrieve_all(domain)

        self.assertIsNone(reqs['responses']['auto'])
        self.assertIsNone(reqs['responses']['cors'])
        self.assertIsNone(reqs['responses']['http'])
        self.assertIsNone(reqs['responses']['https'])
        self.assertIsNone(reqs['session'])

        self.assertEquals(domain, reqs['hostname'])
        self.assertEquals({}, reqs['resources'])
Esempio n. 6
0
def scan(hostname: str, site_id: int, scan_id: int):
    try:
        # Once celery kicks off the task, let's update the scan state from PENDING to RUNNING
        update_scan_state(scan_id, STATE_RUNNING)

        # Get the site's cookies and headers
        headers = select_site_headers(hostname)

        # Attempt to retrieve all the resources
        reqs = retrieve_all(hostname, cookies=headers['cookies'], headers=headers['headers'])

        # If we can't connect at all, let's abort the test
        if reqs['responses']['auto'] is None:
            update_scan_state(scan_id, STATE_FAILED, error='site down')

            return

        # Execute each test, replacing the underscores in the function name with dashes in the test name
        # TODO: Get overridden expectations
        insert_test_results(site_id,
                            scan_id,
                            [test(reqs) for test in tests],
                            sanitize_headers(reqs['responses']['auto'].headers),
                            reqs['responses']['auto'].status_code)

    # catch the celery timeout, which will almost certainly occur in retrieve_all()
    except SoftTimeLimitExceeded:
        update_scan_state(scan_id, STATE_ABORTED, error='site unresponsive')
    except (TimeLimitExceeded, WorkerLostError, WorkerShutdown, WorkerTerminate):
        raise
    # the database is down, oh no!
    except IOError:
        print('database down, aborting scan on {hostname}'.format(hostname=hostname), file=sys.stderr)
    except:
        # TODO: have more specific error messages
        e = sys.exc_info()[1]  # get the error message

        # If we are unsuccessful, close out the scan in the database
        update_scan_state(scan_id, STATE_FAILED, error=repr(e))

        # Print the exception to stderr if we're in dev
        if DEVELOPMENT_MODE:
            import traceback
            print('Error detected in scan for : ' + hostname)
            traceback.print_exc(file=sys.stderr)
    def test_retrieve_mozilla(self):
        reqs = retrieve_all('mozilla.org')

        # Various things we know about mozilla.org
        self.assertIsNotNone(reqs['resources']['/'])
        self.assertIsNotNone(reqs['resources']['/contribute.json'])
        self.assertIsNotNone(reqs['resources']['/robots.txt'])
        self.assertIsNone(reqs['resources']['/clientaccesspolicy.xml'])
        self.assertIsNone(reqs['resources']['/crossdomain.xml'])

        self.assertIsInstance(reqs['responses']['auto'], requests.Response)
        self.assertIsInstance(reqs['responses']['cors'], requests.Response)
        self.assertIsInstance(reqs['responses']['http'], requests.Response)
        self.assertIsInstance(reqs['responses']['https'], requests.Response)
        self.assertIsInstance(reqs['session'], requests.Session)

        self.assertEquals(reqs['hostname'], 'mozilla.org')

        self.assertEquals('text/html', reqs['responses']['auto'].headers['Content-Type'][0:9])
        self.assertEquals(2, len(reqs['responses']['auto'].history))
        self.assertEquals(200, reqs['responses']['auto'].status_code)
        self.assertEquals('https://www.mozilla.org/en-US/', reqs['responses']['auto'].url)
    def test_retrieve_invalid_cert(self):
        reqs = retrieve_all('expired.badssl.com')

        self.assertFalse(reqs['responses']['auto'].verified)
Esempio n. 9
0
def scan(hostname, **kwargs):
    """Performs an Observatory scan, but doesn't require any database/redis
    backing. Given the lowered security concerns due to not being a public
    API, you can use this to scan arbitrary ports and paths.

    Args:
        hostname (str): domain name for host to be scanned

    Kwargs:
        http_port (int): port to scan for HTTP, instead of 80
        https_port (int): port to be scanned for HTTPS, instead of 443
        path (str): path to scan, instead of "/"
        verify (bool): whether to enable or disable certificate verification,
            enabled by default. This can allow tested sites to pass the HSTS
            and HPKP tests, even with self-signed certificates.

        cookies (dict): Cookies sent to the system being scanned. Matches the
            requests cookie dict.
        headers (dict): HTTP headers sent to the system being scanned. Format
            matches the requests headers dict.

    Returns:
        A dict representing the analyze (scan) and getScanResults (test) API call.  Example:

        {
            'scan': {
                'grade': 'A'
                ...
            },
            'test': {
                'content-security-policy': {
                    'pass': True
                    ...
                }
            }
        }
    """
    # Always allow localhost scans when run in this way
    httpobs.conf.SCANNER_ALLOW_LOCALHOST = True

    # Attempt to retrieve all the resources, not capturing exceptions
    reqs = retrieve_all(hostname, **kwargs)

    # If we can't connect at all, let's abort the test
    if reqs['responses']['auto'] is None:
        return {'error': 'site down'}

    # Get all the results
    results = [test(reqs) for test in tests]
    for result in results:
        result['score_description'] = get_score_description(result['result'])

    # Get the score, grade, etc.
    grades = get_grade_and_likelihood_for_score(100 + sum([result.get('score_modifier', 0) for result in results]))
    tests_passed = sum([1 if result.get('pass') else 0 for result in results])

    # Return the results
    return({
        'scan': {
            'grade': grades[1],
            'likelihood_indicator': grades[2],
            'response_headers': dict(reqs['responses']['auto'].headers),
            'score': grades[0],
            'tests_failed': NUM_TESTS - tests_passed,
            'tests_passed': tests_passed,
            'tests_quantity': NUM_TESTS,
        },
        'tests': {result.pop('name'): result for result in results}
    })
Esempio n. 10
0
def scan(hostname, **kwargs):
    """Performs an Observatory scan, but doesn't require any database/redis
    backing. Given the lowered security concerns due to not being a public
    API, you can use this to scan arbitrary ports and paths.

    Args:
        hostname (str): domain name for host to be scanned. Must not include
            protocol (http://, https://) or port number (:80).

    Kwargs:
        http_port (int): port to scan for HTTP, instead of 80
        https_port (int): port to be scanned for HTTPS, instead of 443
        path (str): path to scan, instead of "/"
        verify (bool): whether to enable or disable certificate verification,
            enabled by default. This can allow tested sites to pass the HSTS
            and HPKP tests, even with self-signed certificates.

        cookies (dict): Cookies sent to the system being scanned. Matches the
            requests cookie dict.
        headers (dict): HTTP headers sent to the system being scanned. Format
            matches the requests headers dict.

    Returns:
        A dict representing the analyze (scan) and getScanResults (test) API call.  Example:

        {
            'scan': {
                'grade': 'A'
                ...
            },
            'test': {
                'content-security-policy': {
                    'pass': True
                    ...
                }
            }
        }
    """
    # Always allow localhost scans when run in this way
    httpobs.conf.SCANNER_ALLOW_LOCALHOST = True

    # Attempt to retrieve all the resources, not capturing exceptions
    reqs = retrieve_all(hostname, **kwargs)

    # If we can't connect at all, let's abort the test
    if reqs['responses']['auto'] is None:
        return {'error': 'site down'}

    # Get all the results
    results = [test(reqs) for test in tests]
    for result in results:
        result['score_description'] = get_score_description(result['result'])

    # Get the score, grade, etc.
    grades = get_grade_and_likelihood_for_score(
        100 + sum([result.get('score_modifier', 0) for result in results]))
    tests_passed = sum([1 if result.get('pass') else 0 for result in results])

    # Return the results
    return ({
        'scan': {
            'grade': grades[1],
            'likelihood_indicator': grades[2],
            'response_headers': dict(reqs['responses']['auto'].headers),
            'score': grades[0],
            'tests_failed': NUM_TESTS - tests_passed,
            'tests_passed': tests_passed,
            'tests_quantity': NUM_TESTS,
        },
        'tests': {result.pop('name'): result
                  for result in results}
    })
Esempio n. 11
0
def scan(hostname, **kwargs):
    """Performs an Observatory scan, but doesn't require any database/redis
    backing. Given the lowered security concerns due to not being a public
    API, you can use this to scan arbitrary ports and paths.

    Args:
        hostname (str): domain name for host to be scanned. Must not include
            protocol (http://, https://) or port number (:80).

    Kwargs:
        http_port (int): port to scan for HTTP, instead of 80
        https_port (int): port to be scanned for HTTPS, instead of 443
        path (str): path to scan, instead of "/"
        verify (bool): whether to enable or disable certificate verification,
            enabled by default. This can allow tested sites to pass the HSTS
            and HPKP tests, even with self-signed certificates.

        cookies (dict): Cookies sent to the system being scanned. Matches the
            requests cookie dict.
        headers (dict): HTTP headers sent to the system being scanned. Format
            matches the requests headers dict.

    Returns:
        A dict representing the analyze (scan) and getScanResults (test) API call.  Example:

        {
            'scan': {
                'grade': 'A'
                ...
            },
            'test': {
                'content-security-policy': {
                    'pass': True
                    ...
                }
            }
        }
    """
    # Always allow localhost scans when run in this way
    httpobs.conf.SCANNER_ALLOW_LOCALHOST = True

    # Attempt to retrieve all the resources, not capturing exceptions
    reqs = retrieve_all(hostname, **kwargs)

    # If we can't connect at all, let's abort the test
    if reqs['responses']['auto'] is None:
        return {'error': 'site down'}

    # Code based on httpobs.database.insert_test_results
    tests_failed = tests_passed = 0
    score_with_extra_credit = uncurved_score = 100
    results = {}

    for test in tests:
        # Get result for this test
        result = test(reqs)

        # Add the result with a score_description
        result['score_description'] = get_score_description(result['result'])
        results[result.pop('name')] = result

        # Keep track of how many tests passed or failed
        if result.get('pass'):
            tests_passed += 1
        else:
            tests_failed += 1

        # And keep track of the score
        score_modifier = result.get('score_modifier')
        score_with_extra_credit += score_modifier
        if score_modifier < 0:
            uncurved_score += score_modifier

    # Only record the full score if the uncurved score already receives an A
    score = score_with_extra_credit if uncurved_score >= MINIMUM_SCORE_FOR_EXTRA_CREDIT else uncurved_score

    # Get the score, grade, etc.
    score, grade, likelihood_indicator = get_grade_and_likelihood_for_score(
        score)

    # Return the results
    return ({
        'scan': {
            'grade': grade,
            'likelihood_indicator': likelihood_indicator,
            'response_headers': dict(reqs['responses']['auto'].headers),
            'score': score,
            'tests_failed': tests_failed,
            'tests_passed': tests_passed,
            'tests_quantity': NUM_TESTS,
        },
        'tests': results
    })
    def test_retrieve_invalid_cert(self):
        reqs = retrieve_all('expired.badssl.com')

        self.assertFalse(reqs['responses']['auto'].verified)