Example #1
0
def remove_comply_or_explain(report: UrlListReport):
    # Also remove all comply or explain information as it costs a lot of data/memory on the client

    for url in report.calculation['urls']:

        if "explained_total_issues" not in url:
            # explanations have already been removed.
            continue

        del url["explained_total_issues"]
        del url["explained_high"]
        del url["explained_medium"]
        del url["explained_low"]
        del url["explained_high_endpoints"]
        del url["explained_medium_endpoints"]
        del url["explained_low_endpoints"]
        del url["explained_total_url_issues"]
        del url["explained_url_issues_high"]
        del url["explained_url_issues_medium"]
        del url["explained_url_issues_low"]
        del url["explained_total_endpoint_issues"]
        del url["explained_endpoint_issues_high"]
        del url["explained_endpoint_issues_medium"]
        del url["explained_endpoint_issues_low"]

        for endpoint in url['endpoints']:
            del endpoint['explained_high']
            del endpoint['explained_medium']
            del endpoint['explained_low']

            for rating in endpoint['ratings']:
                del rating['is_explained']
                del rating['comply_or_explain_explanation']
                del rating['comply_or_explain_explained_on']
                del rating['comply_or_explain_explanation_valid_until']
                del rating['comply_or_explain_valid_at_time_of_report']

    if "explained_high" not in report.calculation:
        report.save()
        return

    del report.calculation["explained_high"]
    del report.calculation["explained_medium"]
    del report.calculation["explained_low"]
    del report.calculation["explained_high_endpoints"]
    del report.calculation["explained_medium_endpoints"]
    del report.calculation["explained_low_endpoints"]
    del report.calculation["explained_high_urls"]
    del report.calculation["explained_medium_urls"]
    del report.calculation["explained_low_urls"]
    del report.calculation["explained_total_url_issues"]
    del report.calculation["explained_url_issues_high"]
    del report.calculation["explained_url_issues_medium"]
    del report.calculation["explained_url_issues_low"]
    del report.calculation["explained_total_endpoint_issues"]
    del report.calculation["explained_endpoint_issues_high"]
    del report.calculation["explained_endpoint_issues_medium"]
    del report.calculation["explained_endpoint_issues_low"]

    report.save()
Example #2
0
def add_statistics_over_ratings(report: UrlListReport):
    # works only after ratings by type.
    # todo: in report section, move statistics_per_issue_type to calculation

    report.calculation['statistics_per_issue_type'] = {}

    possible_issues = []

    for url in report.calculation['urls']:
        for endpoint in url['endpoints']:
            possible_issues += endpoint['ratings_by_type'].keys()
    possible_issues = set(possible_issues)

    # prepare the stats dict to have less expensive operations in the 3x nested loop
    for issue in possible_issues:
        # todo: could be a defaultdict. although explicit initialization is somewhat useful.
        report.calculation['statistics_per_issue_type'][issue] = {
            'high': 0,
            'medium': 0,
            'low': 0,
            'ok': 0,
            'not_ok': 0,
            'not_testable': 0,
            'not_applicable': 0,
            'error_in_test': 0
        }

    # count the numbers, can we do this with some map/add function that is way faster?
    for issue in possible_issues:
        for url in report.calculation['urls']:
            for endpoint in url['endpoints']:
                rating = endpoint['ratings_by_type'].get(issue, None)
                if not rating:
                    continue
                report.calculation['statistics_per_issue_type'][issue][
                    'high'] += rating['high']
                report.calculation['statistics_per_issue_type'][issue][
                    'medium'] += rating['medium']
                report.calculation['statistics_per_issue_type'][issue][
                    'low'] += rating['low']
                report.calculation['statistics_per_issue_type'][issue][
                    'not_testable'] += rating['not_testable']
                report.calculation['statistics_per_issue_type'][issue][
                    'not_applicable'] += rating['not_applicable']
                report.calculation['statistics_per_issue_type'][issue][
                    'error_in_test'] += rating['error_in_test']

                # things that are not_testable or not_applicable do not have impact on thigns being OK
                # see: https://github.com/internetstandards/Internet.nl-dashboard/issues/68
                if not any([
                        rating['not_testable'], rating['not_applicable'],
                        rating['error_in_test']
                ]):
                    report.calculation['statistics_per_issue_type'][issue][
                        'ok'] += rating['ok']
                    # these can be summed because only one of high, med, low is 1
                    report.calculation['statistics_per_issue_type'][issue]['not_ok'] += \
                        rating['high'] + rating['medium'] + rating['low']

    report.save()
Example #3
0
def setup_test():
    user = User(
        **{
            'first_name': 'test',
            'last_name': 'test',
            'username': '******',
            'is_active': True
        })
    user.save()

    account = Account(**{'name': 'test'})
    account.save()

    dashboarduser = DashboardUser(
        **{
            'mail_preferred_mail_address': '*****@*****.**',
            'mail_preferred_language': 'nl',
            'mail_send_mail_after_scan_finished': True,
            'account': account,
            'user': user
        })
    dashboarduser.save()

    urllist = UrlList(**{'name': '', 'account': account})
    urllist.save()

    urllistreport = UrlListReport(
        **{
            'urllist': urllist,
            'average_internet_nl_score': 42.42,
            'at_when': timezone.now()
        })
    urllistreport.save()

    internetnlv2scan = InternetNLV2Scan(**{
        'type': 'web',
        'scan_id': '123',
        'state': 'finished'
    })
    internetnlv2scan.save()

    accountinternetnlscan = AccountInternetNLScan(
        **{
            'account': account,
            'scan': internetnlv2scan,
            'urllist': urllist,
            'started_on': timezone.now(),
            'report': urllistreport,
            'finished_on': timezone.now() + timedelta(hours=2)
        })
    accountinternetnlscan.save()

    # first template:
    template = EmailTemplate()
    template.name = "scan_finished_en"
    template.subject = "test"
    template.description = "test"
    template.email_html_text = "test {{report_average_internet_nl_score}}."
    template.save()
def rate_urllist_on_moment(urllist: UrlList,
                           when: datetime = None,
                           prevent_duplicates: bool = True):
    # If there is no time slicing, then it's today.
    if not when:
        when = datetime.now(pytz.utc)

    log.info("Creating report for urllist %s on %s" % (
        urllist,
        when,
    ))

    if UrlListReport.objects.all().filter(urllist=urllist,
                                          at_when=when).exists():
        log.debug(
            "UrllistReport already exists for %s on %s. Not overwriting." %
            (urllist, when))
        return

    urls = relevant_urls_at_timepoint_urllist(urllist=urllist, when=when)
    all_url_ratings = get_latest_urlratings_fast(urls, when)
    calculation = aggegrate_url_rating_scores(
        all_url_ratings,
        only_include_issues=urllist_report_content[urllist.scan_type])

    try:
        last = UrlListReport.objects.filter(
            urllist=urllist, at_when__lte=when).latest('at_when')
    except UrlListReport.DoesNotExist:
        last = UrlListReport()  # create a dummy one for comparison

    calculation['name'] = urllist.name

    if prevent_duplicates:
        if not DeepDiff(last.calculation,
                        calculation,
                        ignore_order=True,
                        report_repetition=True):
            log.warning(
                "The report for %s on %s is the same as the report from %s. Not saving."
                % (urllist, when, last.at_when))
            return

    log.info(
        "The calculation for %s on %s has changed, so we're saving this rating."
        % (urllist, when))

    # remove urls and name from scores object, so it can be used as initialization parameters (saves lines)
    # this is by reference, meaning that the calculation will be affected if we don't work on a clone.
    init_scores = deepcopy(calculation)
    del (init_scores['name'])
    del (init_scores['urls'])

    report = UrlListReport(**init_scores)
    report.urllist = urllist
    report.at_when = when
    report.average_internet_nl_score = sum_internet_nl_scores_over_rating(
        calculation)
    report.calculation = calculation
    report.save()
Example #5
0
def split_score_and_url(report: UrlListReport):
    """
    Split the internet.nl score and the url to be instantly accessible.

    :param report:
    :return:
    """
    for url in report.calculation['urls']:
        for endpoint in url['endpoints']:
            score = 0
            url = ""
            scan = 0
            since = ""
            last_scan = ""
            for rating in endpoint['ratings']:
                if rating['type'] in [
                        "internet_nl_web_overall_score",
                        "internet_nl_mail_dashboard_overall_score"
                ]:
                    # explanation	"78 https://batch.interneā€¦zuiderzeeland.nl/886818/"
                    explanation = rating['explanation'].split(" ")
                    rating['internet_nl_score'] = score = int(explanation[0])
                    rating['internet_nl_url'] = url = explanation[1]
                    scan = rating['scan']
                    since = rating['since']
                    last_scan = rating['last_scan']

            # Now that we had all ratings, add a single value for the score, so we don't have to switch between
            # web or mail, which is severely annoying.
            # there is only one rating per set endpoint. So this is safe
            endpoint['ratings'].append({
                "type": "internet_nl_score",
                "scan_type": "internet_nl_score",
                "internet_nl_score": score,
                "internet_nl_url": url,

                # to comply with the rating structure
                "high": 0,
                "medium":
                1,  # make sure to match simple verdicts as defined above.
                "low": 0,
                "ok": 0,
                "not_testable": False,
                "not_applicable": False,
                "error_in_test": False,
                'test_result': score,
                "scan": scan,
                "since": since,
                "last_scan": last_scan,
                "explanation": "",
            })

    report.save()
Example #6
0
def add_simple_verdicts(report: UrlListReport):
    """
    Reduces the rating fields to a single string value, so the correct rating can be retrieved instantly.

    // these are in ranges of 10's so at later moments some values can be added in between.
    // these are used to compare these ratings without having to convert them in javascript dynamically
    Simple values match the current possible {'passed': 400, 'info': 300, 'warning': 200, 'failed': 100};

    :param report:
    :return:
    """

    for url in report.calculation['urls']:
        for endpoint in url['endpoints']:
            for rating in endpoint['ratings']:
                if rating['high']:
                    rating['simple_verdict'] = "failed"
                    rating['simple_progression'] = 100
                    continue
                if rating['medium']:
                    rating['simple_verdict'] = "warning"
                    rating['simple_progression'] = 200
                    continue
                if rating['low']:
                    rating['simple_verdict'] = "info"
                    rating['simple_progression'] = 300
                    continue
                if rating['ok'] and (not rating['not_applicable']
                                     and not rating['not_testable']):
                    rating['simple_verdict'] = "passed"
                    rating['simple_progression'] = 400
                    continue
                if rating['not_applicable']:
                    rating['simple_verdict'] = "not_applicable"
                    rating['simple_progression'] = 0
                    continue
                if rating['not_testable']:
                    rating['simple_verdict'] = "not_testable"
                    rating['simple_progression'] = 0
                    continue

                # no verdicts === undefined / unknown. We should always have a rating in an endpoint,
                #  otherwise something terrible has happened. Note that split_score_and_url will add some
                # things that do not fit this.
                raise ArithmeticError(
                    f"Missing any sort of verdict for this rating on this endpoint {endpoint['id']}."
                )

    report.save()
Example #7
0
def add_keyed_ratings(report: UrlListReport):
    """
    This creates issues that are directly accessible by keyword, instead of iterating over a list and finding them.
    This is much faster when showing a report of course. Issues are never duplicated anyway, so not doing this was
    probably a design omission.

    :param report:
    :return:
    """

    for url in report.calculation['urls']:
        for endpoint in url['endpoints']:
            endpoint['ratings_by_type'] = {}
            for rating in endpoint['ratings']:
                endpoint['ratings_by_type'][rating['type']] = rating

    report.save()
Example #8
0
def clean_up_not_required_data_to_speed_up_report_on_client(
        report: UrlListReport):
    """
    Loading in JSON objects in the client takes (a lot of) time. The larger the object, the more time.
    Especially with 500+ urls, shaving off data increases parse speed with over 50%. So this is a must

    :param report:
    :return:
    """

    for url in report.calculation['urls']:
        for endpoint in url['endpoints']:
            for rating_key in endpoint['ratings_by_type']:
                # clean up fields we don't need, to make the report show even quicker
                # a lot of stuff from web sec map is nice, but not really useful for us at this moment.
                # perhaps later

                # These values are used in add_statistics_over_ratings and. Only OK is used in the spreadsheet
                # export (which could also be pre-generated).
                del endpoint['ratings_by_type'][rating_key][
                    'high']  # high is used in add_statistics_over_ratings
                del endpoint['ratings_by_type'][rating_key][
                    'medium']  # only 'ok' is used in spreadsheet export.
                del endpoint['ratings_by_type'][rating_key][
                    'low']  # only 'ok' is used in spreadsheet export.
                del endpoint['ratings_by_type'][rating_key][
                    'not_testable']  # only 'ok' is used in spreadsheet export.
                del endpoint['ratings_by_type'][rating_key][
                    'not_applicable']  # only 'ok' is used in spreadsheet export
                del endpoint['ratings_by_type'][rating_key][
                    'error_in_test']  # only 'ok' is used in spreadsheet export
                del endpoint['ratings_by_type'][rating_key]['since']
                del endpoint['ratings_by_type'][rating_key]['last_scan']
                del endpoint['ratings_by_type'][rating_key]['explanation']

                del endpoint['ratings_by_type'][rating_key][
                    'type']  # is already in the key
                del endpoint['ratings_by_type'][rating_key][
                    'scan_type']  # is already in the key

            # remove the original rating, as that slows parsing on the client down significantly.
            # with significantly == Vue will parse it, and for a 500 url list this will take 5 seconds.
            endpoint['ratings'] = []

    report.save()
Example #9
0
def add_simple_verdicts(report: UrlListReport):
    """
    # Todo: this value is already available, and more accurately, from the API. So use the value that got returned
    # from the API instead.

    Reduces the rating fields to a single string value, so the correct rating can be retrieved instantly.

    // these are in ranges of 10's so at later moments some values can be added in between.
    // these are used to compare these ratings without having to convert them in javascript dynamically
    Simple values match the current possible {'passed': 400, 'info': 300, 'warning': 200, 'failed': 100};

    :param report:
    :return:
    """

    # <50 will not be compared
    progression_table = {
        'not_applicable': 0,
        'not_testable': 0,
        'error_in_test': 0,
        'no_mx': 0,
        'unreachable': 0,
        'failed': 100,
        'warning': 200,
        'info': 300,
        'good_not_tested': 380,
        'passed': 400,
    }

    for url in report.calculation['urls']:
        for endpoint in url['endpoints']:
            for rating in endpoint['ratings']:
                rating['simple_progression'] = progression_table.get(
                    rating.get('test_result', ''), 0)

    report.save()
Example #10
0
def add_percentages_to_statistics(report: UrlListReport):

    for key, value in report.calculation['statistics_per_issue_type'].items():
        issue = report.calculation['statistics_per_issue_type'][key]

        # may 2020: we want to see the other issues in the graphs as being gray.
        graphs_all = sum([
            issue['ok'], issue['high'], issue['medium'], issue['low'],
            issue['not_testable'], issue['not_applicable'],
            issue['error_in_test']
        ])
        if all == 0:
            # This happens when everything tested is not applicable or not testable: thus no stats:
            report.calculation['statistics_per_issue_type'][key][
                'pct_high'] = 0
            report.calculation['statistics_per_issue_type'][key][
                'pct_medium'] = 0
            report.calculation['statistics_per_issue_type'][key]['pct_low'] = 0
            report.calculation['statistics_per_issue_type'][key]['pct_ok'] = 0
            report.calculation['statistics_per_issue_type'][key][
                'pct_not_ok'] = 0
            continue

        tcskp = report.calculation['statistics_per_issue_type'][key]
        tcskp['pct_high'] = round((issue['high'] / graphs_all) * 100, 2)
        tcskp['pct_medium'] = round((issue['medium'] / graphs_all) * 100, 2)
        tcskp['pct_low'] = round((issue['low'] / graphs_all) * 100, 2)
        # all other possible stuff. Note that no_mx and such have been mapped to one of these.
        tcskp['pct_not_applicable'] = round(
            (issue['not_applicable'] / graphs_all) * 100, 2)
        tcskp['pct_not_testable'] = round(
            (issue['not_testable'] / graphs_all) * 100, 2)
        tcskp['pct_error_in_test'] = round(
            (issue['error_in_test'] / graphs_all) * 100, 2)

        # May 2019 warning (=medium) and info(=low) do NOT have a score impact, only high has a score impact.
        # https://www.internet.nl/faqs/report/
        # This has been altered in May 2020 to avoid confusion and show different kinds of values, it's now just OK
        # instead of including medium and low as ok.
        tcskp['pct_ok'] = round(((issue['ok']) / graphs_all) * 100, 2)
        tcskp['pct_not_ok'] = round((issue['not_ok'] / graphs_all) * 100, 2)

    report.save()
def upgrade_report_with_unscannable_urls(urllistreport: UrlListReport,
                                         scan: AccountInternetNLScan):
    """
    Urls that cannot be scanned using the internet.nl website are not allowed to be scanned. This is where endpoint
    detection comes into view. Only domains with valid endpoints are (should) be scanned. Other domains have to
    be ignored.

    Yet, when we publish a list of "top 500" domains, only 499 show up in the report. This is due to a number of
    complications.

    1: some domains show up where it is stated that the requirements for scanning where not met. Yet, somehow,
    this domain is in the report while it shouldn't be. This seems to be a bug in the reporting engine (todo) that
    tries to retrieve all results, and if the domain has another endpoint, it is added to the report (alas empty).
    These empty domains are accounted for, and are displayed correctly in the report as being ignored.

    2: some domains do not have any endpoints, such as megaupload.com. Also these should not be scanned.
    These domains however short be reflected in the report, the same as the domains that have a single endpoint.

    To account for these issues, after report generation an extra step is needed that upgrades the report. (There
    already is report upgrading code.) The upgrade will check if all domains are in the report, and if not, add
    the url as being empty. This way all urls that are requested are in the report, and if they are empty, they
    are ignored in all statistics.

    :param urllistreport:
    :param scan:
    :return:
    """

    # See if all urls in the list are also mentioned in the report, if not, add them and also make sure the stats
    # for the report are correct(!). This means all unscannable domains _will_ be in the report, as that matches
    # the list of domains to scan.

    urls_in_report: List[str] = [
        url['url'] for url in urllistreport.calculation['urls']
    ]
    urls_in_list: List[Url] = list(scan.urllist.urls.all())
    urls_not_in_report = [
        url.url for url in urls_in_list if url.url not in urls_in_report
    ]

    # An empty url looks like this:
    empty_url_template = {
        "url": "",
        "ratings": [],
        "endpoints": [],
        "total_issues": 0,
        "high": 0,
        "medium": 0,
        "low": 0,
        "ok": 0,
        "total_endpoints": 0,
        "high_endpoints": 0,
        "medium_endpoints": 0,
        "low_endpoints": 0,
        "ok_endpoints": 0,
        "total_url_issues": 0,
        "url_issues_high": 0,
        "url_issues_medium": 0,
        "url_issues_low": 0,
        "url_ok": 0,
        "total_endpoint_issues": 0,
        "endpoint_issues_high": 0,
        "endpoint_issues_medium": 0,
        "endpoint_issues_low": 0,
    }

    for url_not_in_report in urls_not_in_report:
        # Copy the template, otherwise all instances will point to the same text (the last domain in the list of
        # missing domains).
        tmp_empty_url_template = copy(empty_url_template)
        tmp_empty_url_template['url'] = url_not_in_report
        urllistreport.calculation['urls'].append(tmp_empty_url_template)

    # also update the total urls, as that can be influenced:
    urllistreport.calculation['total_urls'] = len(
        urllistreport.calculation['urls'])
    urllistreport.total_urls = len(urllistreport.calculation['urls'])
    urllistreport.save()

    return urllistreport
Example #12
0
def add_percentages_to_statistics(report: UrlListReport):

    for key, value in report.calculation['statistics_per_issue_type'].items():
        issue = report.calculation['statistics_per_issue_type'][key]

        all = issue['ok'] + issue['not_ok']
        if all == 0:
            # This happens when everything tested is not applicable or not testable: thus no stats:
            report.calculation['statistics_per_issue_type'][key][
                'pct_high'] = 0
            report.calculation['statistics_per_issue_type'][key][
                'pct_medium'] = 0
            report.calculation['statistics_per_issue_type'][key]['pct_low'] = 0
            report.calculation['statistics_per_issue_type'][key]['pct_ok'] = 0
            report.calculation['statistics_per_issue_type'][key][
                'pct_not_ok'] = 0
            continue

        report.calculation['statistics_per_issue_type'][key][
            'pct_high'] = round((issue['high'] / all) * 100, 2)
        report.calculation['statistics_per_issue_type'][key][
            'pct_medium'] = round((issue['medium'] / all) * 100, 2)
        report.calculation['statistics_per_issue_type'][key][
            'pct_low'] = round((issue['low'] / all) * 100, 2)

        # warning (=medium) and info(=low) do NOT have a score impact, only high has a score impact.
        # https://www.internet.nl/faqs/report/
        report.calculation['statistics_per_issue_type'][key]['pct_ok'] = round(
            ((issue['ok'] + issue['low'] + issue['medium']) / all) * 100, 2)

        report.calculation['statistics_per_issue_type'][key][
            'pct_not_ok'] = round((issue['not_ok'] / all) * 100, 2)

        # internet_nl_web_appsecpriv category is labelled as high, probably for some reason (could not find it quickly)
        # but the category is a medium category, which means the score should _always_ be 100.
        # So in this special case we will overwrite the pct_ok with 100%, even though it's lower:
        if key == "internet_nl_web_appsecpriv":
            report.calculation['statistics_per_issue_type'][key][
                'pct_ok'] = 100

    report.save()
Example #13
0
def test_report_upgrade(db, monkeypatch) -> None:
    # Create urllist with a lot of unscannable domains, only apple.com is scannable.
    # megaupload.com will never be scannable, and the rest can have an endpoint and might be in the report
    # already because of this (but without endpoints)

    urls = ['akamaihd.net', 'apple.com', 'bp.blogspot.com', 'clickbank.net', 'cocolog-nifty.com', 'fda.gov',
            'geocities.jp', 'ggpht.com', 'googleusercontent.com', 'megaupload.com', 'nhk.or.jp',
            'ssl-images-amazon.com', 'ytimg.com']

    # create the list, code from test domain management:
    account, created = Account.objects.all().get_or_create(name="test")
    urllist = UrlList()
    urllist.name = "upgrade"
    urllist.account = account
    urllist.save()

    scan = AccountInternetNLScan()
    scan.urllist = urllist
    scan.account = account
    scan.save()

    for url in urls:
        new_url = Url()
        new_url.url = url
        new_url.save()
        urllist.urls.add(new_url)
        urllist.save()

    # fake a report on these domains, without any upgrades, taken from the acc environment:
    fake_calculation = {
        "high": 19,
        "medium": 4,
        "low": 3,
        "ok": 15,
        "total_urls": 1,
        "high_urls": 1,
        "medium_urls": 0,
        "low_urls": 0,
        "ok_urls": 0,
        "explained_high": 0,
        "explained_medium": 0,
        "explained_low": 0,
        "explained_high_endpoints": 0,
        "explained_medium_endpoints": 0,
        "explained_low_endpoints": 0,
        "explained_high_urls": 0,
        "explained_medium_urls": 0,
        "explained_low_urls": 0,
        "explained_total_url_issues": 0,
        "explained_url_issues_high": 0,
        "explained_url_issues_medium": 0,
        "explained_url_issues_low": 0,
        "explained_total_endpoint_issues": 0,
        "explained_endpoint_issues_high": 0,
        "explained_endpoint_issues_medium": 0,
        "explained_endpoint_issues_low": 0,
        "total_endpoints": 1,
        "high_endpoints": 1,
        "medium_endpoints": 0,
        "low_endpoints": 0,
        "ok_endpoints": 0,
        "total_url_issues": 0,
        "total_endpoint_issues": 26,
        "url_issues_high": 0,
        "url_issues_medium": 0,
        "url_issues_low": 0,
        "endpoint_issues_high": 19,
        "endpoint_issues_medium": 4,
        "endpoint_issues_low": 3,
        "urls": [
            {
                "url": "apple.com",
                "ratings": [],
                "endpoints": [
                    {
                        "id": 4599,
                        "concat": "dns_a_aaaa/0 IPv0",
                        "ip": 0,
                        "ip_version": 0,
                        "port": 0,
                        "protocol": "dns_a_aaaa",
                        "v4": False,
                        "ratings": [
                            {
                                "type": "internet_nl_web_ipv6_ws_address",
                                "explanation": "Test internet_nl_web_ipv6_ws_address resulted in failed.",
                                "since": "2020-01-15T13:00:01.116013+00:00",
                                "last_scan": "2020-01-15T13:00:01.116689+00:00",
                                "high": 1,
                                "medium": 0,
                                "low": 0,
                                "ok": 0,
                                "not_testable": False,
                                "not_applicable": False,
                                "error_in_test": False,
                                "is_explained": False,
                                "comply_or_explain_explanation": "",
                                "comply_or_explain_explained_on": "",
                                "comply_or_explain_explanation_valid_until": "",
                                "comply_or_explain_valid_at_time_of_report": False,
                                "scan": 114575,
                                "scan_type": "internet_nl_web_ipv6_ws_address"
                            },
                            {
                                "type": "internet_nl_web_dnssec_valid",
                                "explanation": "Test internet_nl_web_dnssec_valid resulted in failed.",
                                "since": "2020-01-15T13:00:00.684906+00:00",
                                "last_scan": "2020-01-15T13:00:00.685193+00:00",
                                "high": 1,
                                "medium": 0,
                                "low": 0,
                                "ok": 0,
                                "not_testable": False,
                                "not_applicable": False,
                                "error_in_test": False,
                                "is_explained": False,
                                "comply_or_explain_explanation": "",
                                "comply_or_explain_explained_on": "",
                                "comply_or_explain_explanation_valid_until": "",
                                "comply_or_explain_valid_at_time_of_report": False,
                                "scan": 114556,
                                "scan_type": "internet_nl_web_dnssec_valid"
                            },
                        ],
                        "high": 19,
                        "medium": 4,
                        "low": 3,
                        "ok": 15,
                        "explained_high": 0,
                        "explained_medium": 0,
                        "explained_low": 0
                    }
                ],
                "total_issues": 26,
                "high": 19,
                "medium": 4,
                "low": 3,
                "ok": 15,
                "total_endpoints": 1,
                "high_endpoints": 1,
                "medium_endpoints": 0,
                "low_endpoints": 0,
                "ok_endpoints": 0,
                "total_url_issues": 0,
                "url_issues_high": 0,
                "url_issues_medium": 0,
                "url_issues_low": 0,
                "url_ok": 0,
                "total_endpoint_issues": 26,
                "endpoint_issues_high": 19,
                "endpoint_issues_medium": 4,
                "endpoint_issues_low": 3,
                "explained_total_issues": 0,
                "explained_high": 0,
                "explained_medium": 0,
                "explained_low": 0,
                "explained_high_endpoints": 0,
                "explained_medium_endpoints": 0,
                "explained_low_endpoints": 0,
                "explained_total_url_issues": 0,
                "explained_url_issues_high": 0,
                "explained_url_issues_medium": 0,
                "explained_url_issues_low": 0,
                "explained_total_endpoint_issues": 0,
                "explained_endpoint_issues_high": 0,
                "explained_endpoint_issues_medium": 0,
                "explained_endpoint_issues_low": 0
            }
        ],
        "total_issues": 26,
        "name": "Unscannable Web + one scannable"
    }

    fake_report = UrlListReport()
    fake_report.calculation = fake_calculation
    fake_report.urllist = urllist
    fake_report.at_when = timezone.now()
    fake_report.save()

    # First check if we are removing the comply_or_explain keys, mainly to save data:
    remove_comply_or_explain(fake_calculation)
    assert "explained_endpoint_issues_high" not in fake_calculation['urls'][0]
    assert "comply_or_explain_explanation" not in fake_calculation['urls'][0]['endpoints'][0]["ratings"][0]

    # Now add ratings based on keys, which makes direct access possible:
    add_keyed_ratings(fake_calculation)
    assert "ratings_by_type" in fake_calculation['urls'][0]['endpoints'][0]
    assert "internet_nl_web_ipv6_ws_address" in fake_calculation['urls'][0]['endpoints'][0]['ratings_by_type']

    # Add graph statistics, so the graphs can be instantly created based on report data
    add_statistics_over_ratings(fake_calculation)
    assert "statistics_per_issue_type" in fake_calculation
    assert "internet_nl_web_ipv6_ws_address" in fake_calculation["statistics_per_issue_type"]
    # todo: we can add some tests here to see if the aggregation is correct

    # add some statistics over all these metrics
    add_percentages_to_statistics(fake_calculation)

    assert "pct_ok" in fake_calculation["statistics_per_issue_type"]["internet_nl_web_ipv6_ws_address"]

    # and make sure the report is complete: meaning that all urls requested are present, even though they
    # could not be scanned. So a top 100 stays a top 100.
    assert (len(fake_calculation['urls']) == 1)
    upgrade_report_with_unscannable_urls(fake_report.id, scan.id)
    fake_report = UrlListReport.objects.all().first()
    assert(len(fake_report.calculation['urls']) == len(urls))

    # the first url should still be by apple:
    assert fake_report.calculation['urls'][0]['url'] == "apple.com"
def rate_urllist_on_moment(urllist: UrlList, when: datetime = None, prevent_duplicates: bool = True):
    """
    :param urllist:
    :param when: A moment in time of which data should be aggregated
    :param prevent_duplicates: If the last report had the same data, don't save a new report but return the last report
    instead.
    :return: UrlListReport
    """
    # If there is no time slicing, then it's today.
    if not when:
        when = datetime.now(pytz.utc)

    log.info("Creating report for urllist %s on %s" % (urllist, when, ))

    if UrlListReport.objects.all().filter(urllist=urllist, at_when=when).exists():
        log.debug("UrllistReport already exists for %s on %s. Not overwriting." % (urllist, when))
        existing_report = UrlListReport.objects.all().filter(urllist=urllist, at_when=when).first()
        return existing_report

    urls = relevant_urls_at_timepoint_urllist(urllist=urllist, when=when)
    all_url_ratings = get_latest_urlratings_fast(urls, when)

    # Clean the url_ratings to only include the content we need, only the content (being removed)
    # and only the endpoint types
    for urlrating in all_url_ratings:
        calculation = remove_issues_from_calculation(urlrating.calculation, urllist_report_content[urllist.scan_type])

        # Some endpoint types use the same ratings, such as dns_soa and dns_mx... This means that not
        # all endpoints will be removed for internet.nl. We need the following endpoints per scan:
        # -> note: urllist stores web/mail, they mean: web and mail_dashboard.
        endpoint_types_per_scan = {"web": "dns_a_aaaa", "mail": "dns_soa"}
        calculation = only_include_endpoint_protocols(calculation, [endpoint_types_per_scan[urllist.scan_type]])

        # This already overrides endpoint statistics, use the calculation you get from this.
        calculation, amount_of_issues = statistics_over_url_calculation(calculation)
        # overwrite the rest of the statistics.
        calculation = add_statistics_to_calculation(calculation, amount_of_issues)

        urlrating.calculation = calculation

    calculation = aggegrate_url_rating_scores(all_url_ratings)

    try:
        last = UrlListReport.objects.filter(urllist=urllist, at_when__lte=when).latest('at_when')
    except UrlListReport.DoesNotExist:
        last = UrlListReport()  # create a dummy one for comparison

    calculation['name'] = urllist.name

    if prevent_duplicates:
        if not DeepDiff(last.calculation, calculation, ignore_order=True, report_repetition=True):
            log.warning("The report for %s on %s is the same as the report from %s. Not saving." % (
                urllist, when, last.at_when))
            return last

    log.info("The calculation for %s on %s has changed, so we're saving this rating." % (urllist, when))

    # remove urls and name from scores object, so it can be used as initialization parameters (saves lines)
    # this is by reference, meaning that the calculation will be affected if we don't work on a clone.
    init_scores = deepcopy(calculation)
    del(init_scores['name'])
    del(init_scores['urls'])

    report = UrlListReport(**init_scores)
    report.urllist = urllist
    report.at_when = when
    report.average_internet_nl_score = sum_internet_nl_scores_over_rating(calculation)
    report.calculation = calculation
    report.save()
    return report
Example #15
0
def test_urllistreport_get_previous_report(db):
    account = Account(**{'name': 'test'})
    account.save()

    u = UrlList(**{'name': '', 'account': account})
    u.save()

    urllistreport1 = UrlListReport(
        **{
            'urllist': u,
            'average_internet_nl_score': 1,
            'at_when': datetime(2020, 5, 1)
        })
    urllistreport1.save()

    urllistreport2 = UrlListReport(
        **{
            'urllist': u,
            'average_internet_nl_score': 1,
            'at_when': datetime(2020, 5, 2)
        })
    urllistreport2.save()

    urllistreport3 = UrlListReport(
        **{
            'urllist': u,
            'average_internet_nl_score': 1,
            'at_when': datetime(2020, 5, 3)
        })
    urllistreport3.save()

    urllistreport4 = UrlListReport(
        **{
            'urllist': u,
            'average_internet_nl_score': 1,
            'at_when': datetime(2020, 5, 4)
        })
    urllistreport4.save()

    urllistreport5 = UrlListReport(
        **{
            'urllist': u,
            'average_internet_nl_score': 1,
            'at_when': datetime(2020, 5, 5)
        })
    urllistreport5.save()

    assert urllistreport5.get_previous_report_from_this_list(
    ) == urllistreport4
    assert urllistreport4.get_previous_report_from_this_list(
    ) == urllistreport3
    assert urllistreport3.get_previous_report_from_this_list(
    ) == urllistreport2
    assert urllistreport2.get_previous_report_from_this_list(
    ) == urllistreport1
    assert urllistreport1.get_previous_report_from_this_list() is None
def rate_urllist_on_moment(urllist: UrlList,
                           when: datetime = None,
                           prevent_duplicates: bool = True,
                           scan_type: str = "web") -> int:
    """
    :param urllist:
    :param when: A moment in time of which data should be aggregated
    :param prevent_duplicates: If the last report had the same data, don't save a new report but return the last report
    instead.
    :return: UrlListReport id
    """
    # If there is no time slicing, then it's today.
    if not when:
        when = datetime.now(pytz.utc)

    log.info(f"Creating report for urllist {urllist} on {when}")

    if UrlListReport.objects.all().filter(urllist=urllist,
                                          at_when=when).exists():
        log.debug(
            f"UrllistReport already exists for {urllist} on {when}. Not overwriting."
        )
        existing_report = UrlListReport.objects.all().filter(
            urllist=urllist, at_when=when).first()
        return int(existing_report.id)

    urls = relevant_urls_at_timepoint_urllist(urllist=urllist, when=when)
    log.debug(f'Found {len(urls)} to be relevant at this moment.')

    calculation = create_calculation_on_urls(urls, when, scan_type=scan_type)

    try:
        last = UrlListReport.objects.filter(
            urllist=urllist, at_when__lte=when).latest('at_when')
    except UrlListReport.DoesNotExist:
        last = UrlListReport()  # create a dummy one for comparison

    calculation['name'] = urllist.name

    if prevent_duplicates:
        if not DeepDiff(last.calculation,
                        calculation,
                        ignore_order=True,
                        report_repetition=True):
            log.info(
                f"The report for {urllist} on {when} is the same as the report from {last.at_when}. Not saving."
            )
            return int(last.id)

    log.info(
        f"The calculation for {urllist} on {when} has changed, so we're saving this rating."
    )

    # remove urls and name from scores object, so it can be used as initialization parameters (saves lines)
    # this is by reference, meaning that the calculation will be affected if we don't work on a clone.
    init_scores = deepcopy(calculation)
    del init_scores['name']
    del init_scores['urls']

    external_scan_type = {
        "web": "web",
        "mail": "mail",
        "mail_dashboard": "mail"
    }
    report = UrlListReport(**init_scores)
    report.urllist = urllist
    report.report_type = external_scan_type[scan_type]
    report.at_when = when
    report.average_internet_nl_score = sum_internet_nl_scores_over_rating(
        calculation)
    report.calculation = calculation
    report.save()
    return int(report.id)