async def test_before_request_reject_request_to_path_not_matching_filter_in_whitelist(
            self):
        filter = FilterRequestFromURL(allowed_urls="/allowed")

        with self.assertRaises(RejectRequest):
            await filter.before_request(
                Entry.create("http://example.com/test/index.html"))
    async def test_before_request_reject_request_to_host_not_matching_filter_in_whitelist(
            self):
        filter = FilterRequestFromURL(allowed_urls="example.com")

        with self.assertRaises(RejectRequest):
            await filter.before_request(Entry.create("http://www.example.com/")
                                        )
    async def test_before_request_reject_request_to_path_and_host_matching_filter_in_blacklist(
            self):
        filter = FilterRequestFromURL(forbidden_urls="example.com/forbidden")

        with self.assertRaises(RejectRequest):
            await filter.before_request(
                Entry.create("http://example.com/forbidden/index.html"))
    async def test_before_request_allow_request_to_path_or_host_not_matching_filter_in_blacklist(
            self):
        filter = FilterRequestFromURL(forbidden_urls="example.com/forbidden")

        await filter.before_request(
            Entry.create("http://example.ca/forbidden/index.html"))
        await filter.before_request(
            Entry.create("http://example.com/test/index.html"))
Esempio n. 5
0
def setup_hammertime_heuristics(hammertime,
                                *,
                                user_agent=default_user_agent,
                                vhost=None):
    #  TODO Make sure rejecting 404 does not conflict with tomcat fake 404 detection.
    global heuristics_with_child
    dead_host_detection = DeadHostDetection(threshold=200)
    detect_soft_404 = DetectSoft404(distance_threshold=6)
    follow_redirects = FollowRedirects()
    heuristics_with_child = [
        RejectCatchAllRedirect(), follow_redirects,
        RejectIgnoredQuery()
    ]
    hosts = (vhost,
             conf.target_host) if vhost is not None else conf.target_host
    global_heuristics = [
        RejectStatusCode({404, 406, 502, 503}),
        DynamicTimeout(1.0, 5),
        RedirectLimiter(),
        FilterRequestFromURL(allowed_urls=hosts),
        IgnoreLargeBody(initial_limit=initial_limit)
    ]
    heuristics = [
        StripTag('input'),
        StripTag('script'), detect_soft_404,
        RejectSoft404(),
        MatchString(),
        DetectBehaviorChange(buffer_size=100),
        LogBehaviorChange()
    ]

    # Dead host detection must be first to make sure there is no skipped after_headers
    hammertime.heuristics.add(dead_host_detection)

    hammertime.heuristics.add_multiple(global_heuristics)

    # Make sure follow redirect comes in before soft404
    hammertime.heuristics.add_multiple(heuristics_with_child)
    hammertime.heuristics.add_multiple(heuristics)

    for heuristic in heuristics_with_child:
        heuristic.child_heuristics.add_multiple(global_heuristics)

    detect_soft_404.child_heuristics.add(StripTag('input'))
    detect_soft_404.child_heuristics.add(StripTag('script'))
    detect_soft_404.child_heuristics.add(dead_host_detection)
    detect_soft_404.child_heuristics.add(follow_redirects)

    add_http_header(hammertime, "User-Agent", user_agent)
    add_http_header(hammertime, "Host",
                    vhost if vhost is not None else conf.target_host)
    async def test_before_request_accept_list_of_url_for_whitelist(self):
        filter = FilterRequestFromURL(allowed_urls=[
            "example.com", "www.example.com", "test.example.com"
        ])
        good_urls = [
            "http://example.com/", "https://www.example.com/index.php",
            "http://test.example.com/index.html"
        ]
        bad_urls = [
            "http://abc.example.com/", "https://example.ca/",
            "http://www.example.com.net/"
        ]

        for url in bad_urls:
            with self.assertRaises(RejectRequest):
                await filter.before_request(Entry.create(url))
        for url in good_urls:
            await filter.before_request(Entry.create(url))
    async def test_before_request_allow_request_to_path_and_host_matching_filter_in_whitelist(
            self):
        filter = FilterRequestFromURL(allowed_urls="example.com/allowed")

        await filter.before_request(
            Entry.create("http://example.com/allowed/index.html"))
 def test_constructor_raise_value_error_if_both_domain_list_are_set(self):
     with self.assertRaises(ValueError):
         FilterRequestFromURL(allowed_urls="example.com",
                              forbidden_urls="test.com")
 def test_constructor_raise_value_error_if_both_domain_list_are_none(self):
     with self.assertRaises(ValueError):
         FilterRequestFromURL(allowed_urls=None, forbidden_urls=None)
    async def test_path_matching_only_apply_to_full_directory_name(self):
        filter = FilterRequestFromURL(allowed_urls="example.com/allowed")

        with self.assertRaises(RejectRequest):
            await filter.before_request(
                Entry.create("http://example.com/allowed-test"))
Esempio n. 11
0
def setup_hammertime_heuristics(hammertime,
                                *,
                                user_agent=default_user_agent,
                                vhost=None,
                                confirmation_factor=1,
                                har_output_dir=None):
    global heuristics_with_child
    dead_host_detection = DeadHostDetection(threshold=200)
    detect_soft_404 = DetectSoft404(distance_threshold=6,
                                    confirmation_factor=confirmation_factor)
    follow_redirects = FollowRedirects()
    heuristics_with_child = [
        RejectCatchAllRedirect(), follow_redirects,
        RejectIgnoredQuery()
    ]
    hosts = (vhost,
             conf.target_host) if vhost is not None else conf.target_host

    init_heuristics = [
        SetHeader("User-Agent", user_agent),
        SetHeader("Host", vhost if vhost is not None else conf.target_host),
        ContentHashSampling(),
        ContentSampling(),
        ContentSimhashSampling(), dead_host_detection,
        RejectStatusCode({503, 508}, exception_class=StopRequest),
        StripTag('input'),
        StripTag('script')
    ]

    global_heuristics = [
        RejectStatusCode({404, 406, 502}),
        RejectWebApplicationFirewall(),
        DynamicTimeout(1.0, 5),
        RedirectLimiter(),
        FilterRequestFromURL(allowed_urls=hosts),
        IgnoreLargeBody(initial_limit=initial_limit)
    ]

    # Dead host detection must be first to make sure there is no skipped after_headers
    hammertime.heuristics.add_multiple(init_heuristics)

    # General
    hammertime.heuristics.add_multiple(global_heuristics)
    hammertime.heuristics.add_multiple(heuristics_with_child)
    hammertime.heuristics.add_multiple([
        detect_soft_404,
        MatchString(),
        ValidateEntry(),
        DetectBehaviorChange(buffer_size=100),
        LogBehaviorChange(),
        ValidateEntry(),
    ])
    detect_soft_404.child_heuristics.add_multiple(init_heuristics)
    detect_soft_404.child_heuristics.add_multiple(heuristics_with_child)

    for heuristic in heuristics_with_child:
        heuristic.child_heuristics.add_multiple(init_heuristics)
        heuristic.child_heuristics.add_multiple(global_heuristics)

    if har_output_dir is not None:
        from tachyon.har import StoreHAR, FileWriter
        hammertime.heuristics.add(StoreHAR(writer=FileWriter(har_output_dir)))