def setup_test(): user = User( **{ 'first_name': 'test', 'last_name': 'test', 'username': '******', 'is_active': True }) user.save() account = Account(**{'name': 'test'}) account.save() dashboarduser = DashboardUser( **{ 'mail_preferred_mail_address': '*****@*****.**', 'mail_preferred_language': 'nl', 'mail_send_mail_after_scan_finished': True, 'account': account, 'user': user }) dashboarduser.save() urllist = UrlList(**{'name': '', 'account': account}) urllist.save() urllistreport = UrlListReport( **{ 'urllist': urllist, 'average_internet_nl_score': 42.42, 'at_when': timezone.now() }) urllistreport.save() internetnlv2scan = InternetNLV2Scan(**{ 'type': 'web', 'scan_id': '123', 'state': 'finished' }) internetnlv2scan.save() accountinternetnlscan = AccountInternetNLScan( **{ 'account': account, 'scan': internetnlv2scan, 'urllist': urllist, 'started_on': timezone.now(), 'report': urllistreport, 'finished_on': timezone.now() + timedelta(hours=2) }) accountinternetnlscan.save() # first template: template = EmailTemplate() template.name = "scan_finished_en" template.subject = "test" template.description = "test" template.email_html_text = "test {{report_average_internet_nl_score}}." template.save()
def test_urllistreport_get_previous_report(db): account = Account(**{'name': 'test'}) account.save() u = UrlList(**{'name': '', 'account': account}) u.save() urllistreport1 = UrlListReport( **{ 'urllist': u, 'average_internet_nl_score': 1, 'at_when': datetime(2020, 5, 1) }) urllistreport1.save() urllistreport2 = UrlListReport( **{ 'urllist': u, 'average_internet_nl_score': 1, 'at_when': datetime(2020, 5, 2) }) urllistreport2.save() urllistreport3 = UrlListReport( **{ 'urllist': u, 'average_internet_nl_score': 1, 'at_when': datetime(2020, 5, 3) }) urllistreport3.save() urllistreport4 = UrlListReport( **{ 'urllist': u, 'average_internet_nl_score': 1, 'at_when': datetime(2020, 5, 4) }) urllistreport4.save() urllistreport5 = UrlListReport( **{ 'urllist': u, 'average_internet_nl_score': 1, 'at_when': datetime(2020, 5, 5) }) urllistreport5.save() assert urllistreport5.get_previous_report_from_this_list( ) == urllistreport4 assert urllistreport4.get_previous_report_from_this_list( ) == urllistreport3 assert urllistreport3.get_previous_report_from_this_list( ) == urllistreport2 assert urllistreport2.get_previous_report_from_this_list( ) == urllistreport1 assert urllistreport1.get_previous_report_from_this_list() is None
def scan_urllist_now_ignoring_business_rules(urllist: UrlList): urllist = UrlList.objects.all().filter(pk=urllist.id).first() if not urllist: return operation_response(error=True, message="List could not be found.") initialize_scan(urllist) urllist.last_manual_scan = timezone.now() urllist.save() return operation_response(success=True, message="Scan started")
def get_or_create_list_by_name(account, name: str) -> UrlList: existing_list = UrlList.objects.all().filter( account=account, name=name, is_deleted=False, ).first() if existing_list: return existing_list else: urllist = UrlList(**{'name': name, 'account': account}) urllist.save() return urllist
def scan_urllist_now_ignoring_business_rules(urllist: UrlList): urllist = UrlList.objects.all().filter(pk=urllist.id).first() if not urllist: return operation_response(error=True, message="List could not be found.") try: create_dashboard_scan_tasks(urllist).apply_async() except ValueError: return operation_response( error=True, message="Password to the internet.nl API is not set or incorrect.") urllist.last_manual_scan = timezone.now() urllist.save() return operation_response(success=True, message="Scan started")
def test_update_state(db): account = Account() account.save() urllist = UrlList(**{'name': '', 'account': account}) urllist.save() scan = AccountInternetNLScan() scan.urllist = urllist scan.account = account scan.save() update_state("new", scan.id) # A situation has occurred where the log was already in the next step, but the scan state itself was # at the old step. This caused the scan to block: as the logic was that if the log is in the correct / new state # it was assumed the scan was also in that state. This is fixed and tested below. scanlog = AccountInternetNLScanLog() scanlog.scan = scan scanlog.at_when = timezone.now() scanlog.state = "out of sync" scanlog.save() update_state("out of sync", scan.id) my_scan = AccountInternetNLScan.objects.all().first() assert my_scan.state == "out of sync" # new, out of sync, out of sync # The last duplicate is stored to make it clear something went wrong. There is nothing 'attached' to the log # other than understanding the process in case of weird situations. assert AccountInternetNLScanLog.objects.all().count() == 3 # make sure the amount of log info does not grow if things are the same update_state("out of sync", my_scan.id) update_state("out of sync", my_scan.id) update_state("out of sync", my_scan.id) assert AccountInternetNLScanLog.objects.all().count() == 3
def create_list(account: Account, user_input: Dict) -> Dict[str, Any]: expected_keys = [ 'id', 'name', 'enable_scans', 'scan_type', 'automated_scan_frequency', 'scheduled_next_scan' ] if sorted(user_input.keys()) != sorted(expected_keys): return operation_response(error=True, message="Missing settings.") frequency = validate_list_automated_scan_frequency( user_input['automated_scan_frequency']) data = { 'account': account, 'name': validate_list_name(user_input['name']), 'enable_scans': bool(user_input['enable_scans']), 'scan_type': validate_list_scan_type(user_input['scan_type']), 'automated_scan_frequency': frequency, 'scheduled_next_scan': UrlList.determine_next_scan_moment(frequency) } urllist = UrlList(**data) urllist.save() # make sure the account is serializable. data['account'] = account.id # adding the ID makes it possible to add new urls to a new list. data['id'] = urllist.pk # give a hint if it can be scanned: data['scan_now_available'] = urllist.is_scan_now_available() return operation_response(success=True, message="List created.", data=data)
def retroactively_add_domains_and_endpoints_from_report( http_response, scan_type, account): log.debug( "Step 2: create list with urls from the report, with associated endpoints." ) new_list = UrlList() new_list.name = http_response.get('data', {}).get('name', 'unnamed list') new_list.account = account new_list.scan_type = scan_type if scan_type == 'web' else 'mail' new_list.enable_scans = False new_list.save() log.debug("Step 3: add all urls from the report to the database.") domains = http_response.get('data', {}).get('domains', {}) new_urls = [] for domain in domains: new_urls.append(domain['domain']) log.debug( "Step 4: Make sure that all these urls get an endpoint that matches the scan type." ) # The endpoint must be alive. scan_type_to_protocol = { 'mail': 'dns_mx_no_cname', 'mail_dashboard': 'dns_soa', 'web': 'dns_a_aaaa' } for new_url in new_urls: existing_endpoint = Endpoint.objects.all().filter( protocol=scan_type_to_protocol[scan_type], url__url=new_url, is_dead=False).first() if not existing_endpoint: ep = Endpoint() ep.url = Url.objects.all().filter(url=new_url).first() ep.discovered_on = timezone.now() ep.port = 0 ep.ip_version = 0 ep.protocol = scan_type_to_protocol[scan_type] ep.is_dead = False ep.save() log.debug( "Step 5: add all urls to a list, so a report on that list can be created." ) debug_output = _add_to_urls_to_urllist(account, new_list, new_urls) log.info(debug_output) return new_list
def update_list_settings(account: Account, user_input: Dict) -> Dict[str, Any]: """ This cannot update the urls, as that would increase complexity too much. :param account: :param user_input: { 'id': int, 'name': str, 'enable_scans': bool, 'scan_type': str, # todo: Who should set this? Should this be set by admins? How can we avoid permission hell? # Probably as long as the settings are not too detailed / too frequently. 'automated_scan_frequency': str, } :return: """ expected_keys = [ 'id', 'name', 'enable_scans', 'scan_type', 'automated_scan_frequency', 'scheduled_next_scan' ] if check_keys(expected_keys, user_input): return operation_response(error=True, message="Missing settings.") prefetch_last_scan = Prefetch( 'accountinternetnlscan_set', queryset=AccountInternetNLScan.objects.order_by('-id').select_related( 'scan'), to_attr='last_scan') last_report_prefetch = Prefetch( 'urllistreport_set', # filter(pk=UrlListReport.objects.latest('id').pk). queryset=UrlListReport.objects.order_by('-id').only('id', 'at_when'), to_attr='last_report') urllist = UrlList.objects.all().filter(account=account, id=user_input['id'], is_deleted=False).prefetch_related( prefetch_last_scan, last_report_prefetch).first() if not urllist: return operation_response(error=True, message="No list of urls found.") # Yes, you can try and set any value. Values that are not recognized do not result in errors / error messages, # instead they will be overwritten with the default. This means less interaction with users / less annoyance over # errors on such simple forms. frequency = validate_list_automated_scan_frequency( user_input['automated_scan_frequency']) data = { 'id': urllist.id, 'account': account, 'name': validate_list_name(user_input['name']), 'enable_scans': bool(user_input['enable_scans']), 'scan_type': validate_list_scan_type(user_input['scan_type']), 'automated_scan_frequency': frequency, 'scheduled_next_scan': UrlList.determine_next_scan_moment(frequency), } updated_urllist = UrlList(**data) updated_urllist.save() # make sure the account is serializable. data['account'] = account.id # inject the last scan information. data['last_scan_id'] = None if not len( urllist.last_scan) else urllist.last_scan[0].scan.id data['last_scan'] = None if not len( urllist.last_scan) else urllist.last_scan[0].scan.started_on.isoformat( ) data['last_scan_finished'] = None if not len( urllist.last_scan) else urllist.last_scan[0].scan.finished data['last_report_id'] = None if not len( urllist.last_report) else urllist.last_report[0].id data['last_report_date'] = None if not len( urllist.last_report) else urllist.last_report[0].at_when data['scan_now_available'] = updated_urllist.is_scan_now_available() log.debug(data) return operation_response(success=True, message="Updated list settings", data=data)
def test_report_upgrade(db, monkeypatch) -> None: # Create urllist with a lot of unscannable domains, only apple.com is scannable. # megaupload.com will never be scannable, and the rest can have an endpoint and might be in the report # already because of this (but without endpoints) urls = ['akamaihd.net', 'apple.com', 'bp.blogspot.com', 'clickbank.net', 'cocolog-nifty.com', 'fda.gov', 'geocities.jp', 'ggpht.com', 'googleusercontent.com', 'megaupload.com', 'nhk.or.jp', 'ssl-images-amazon.com', 'ytimg.com'] # create the list, code from test domain management: account, created = Account.objects.all().get_or_create(name="test") urllist = UrlList() urllist.name = "upgrade" urllist.account = account urllist.save() scan = AccountInternetNLScan() scan.urllist = urllist scan.account = account scan.save() for url in urls: new_url = Url() new_url.url = url new_url.save() urllist.urls.add(new_url) urllist.save() # fake a report on these domains, without any upgrades, taken from the acc environment: fake_calculation = { "high": 19, "medium": 4, "low": 3, "ok": 15, "total_urls": 1, "high_urls": 1, "medium_urls": 0, "low_urls": 0, "ok_urls": 0, "explained_high": 0, "explained_medium": 0, "explained_low": 0, "explained_high_endpoints": 0, "explained_medium_endpoints": 0, "explained_low_endpoints": 0, "explained_high_urls": 0, "explained_medium_urls": 0, "explained_low_urls": 0, "explained_total_url_issues": 0, "explained_url_issues_high": 0, "explained_url_issues_medium": 0, "explained_url_issues_low": 0, "explained_total_endpoint_issues": 0, "explained_endpoint_issues_high": 0, "explained_endpoint_issues_medium": 0, "explained_endpoint_issues_low": 0, "total_endpoints": 1, "high_endpoints": 1, "medium_endpoints": 0, "low_endpoints": 0, "ok_endpoints": 0, "total_url_issues": 0, "total_endpoint_issues": 26, "url_issues_high": 0, "url_issues_medium": 0, "url_issues_low": 0, "endpoint_issues_high": 19, "endpoint_issues_medium": 4, "endpoint_issues_low": 3, "urls": [ { "url": "apple.com", "ratings": [], "endpoints": [ { "id": 4599, "concat": "dns_a_aaaa/0 IPv0", "ip": 0, "ip_version": 0, "port": 0, "protocol": "dns_a_aaaa", "v4": False, "ratings": [ { "type": "internet_nl_web_ipv6_ws_address", "explanation": "Test internet_nl_web_ipv6_ws_address resulted in failed.", "since": "2020-01-15T13:00:01.116013+00:00", "last_scan": "2020-01-15T13:00:01.116689+00:00", "high": 1, "medium": 0, "low": 0, "ok": 0, "not_testable": False, "not_applicable": False, "error_in_test": False, "is_explained": False, "comply_or_explain_explanation": "", "comply_or_explain_explained_on": "", "comply_or_explain_explanation_valid_until": "", "comply_or_explain_valid_at_time_of_report": False, "scan": 114575, "scan_type": "internet_nl_web_ipv6_ws_address" }, { "type": "internet_nl_web_dnssec_valid", "explanation": "Test internet_nl_web_dnssec_valid resulted in failed.", "since": "2020-01-15T13:00:00.684906+00:00", "last_scan": "2020-01-15T13:00:00.685193+00:00", "high": 1, "medium": 0, "low": 0, "ok": 0, "not_testable": False, "not_applicable": False, "error_in_test": False, "is_explained": False, "comply_or_explain_explanation": "", "comply_or_explain_explained_on": "", "comply_or_explain_explanation_valid_until": "", "comply_or_explain_valid_at_time_of_report": False, "scan": 114556, "scan_type": "internet_nl_web_dnssec_valid" }, ], "high": 19, "medium": 4, "low": 3, "ok": 15, "explained_high": 0, "explained_medium": 0, "explained_low": 0 } ], "total_issues": 26, "high": 19, "medium": 4, "low": 3, "ok": 15, "total_endpoints": 1, "high_endpoints": 1, "medium_endpoints": 0, "low_endpoints": 0, "ok_endpoints": 0, "total_url_issues": 0, "url_issues_high": 0, "url_issues_medium": 0, "url_issues_low": 0, "url_ok": 0, "total_endpoint_issues": 26, "endpoint_issues_high": 19, "endpoint_issues_medium": 4, "endpoint_issues_low": 3, "explained_total_issues": 0, "explained_high": 0, "explained_medium": 0, "explained_low": 0, "explained_high_endpoints": 0, "explained_medium_endpoints": 0, "explained_low_endpoints": 0, "explained_total_url_issues": 0, "explained_url_issues_high": 0, "explained_url_issues_medium": 0, "explained_url_issues_low": 0, "explained_total_endpoint_issues": 0, "explained_endpoint_issues_high": 0, "explained_endpoint_issues_medium": 0, "explained_endpoint_issues_low": 0 } ], "total_issues": 26, "name": "Unscannable Web + one scannable" } fake_report = UrlListReport() fake_report.calculation = fake_calculation fake_report.urllist = urllist fake_report.at_when = timezone.now() fake_report.save() # First check if we are removing the comply_or_explain keys, mainly to save data: remove_comply_or_explain(fake_calculation) assert "explained_endpoint_issues_high" not in fake_calculation['urls'][0] assert "comply_or_explain_explanation" not in fake_calculation['urls'][0]['endpoints'][0]["ratings"][0] # Now add ratings based on keys, which makes direct access possible: add_keyed_ratings(fake_calculation) assert "ratings_by_type" in fake_calculation['urls'][0]['endpoints'][0] assert "internet_nl_web_ipv6_ws_address" in fake_calculation['urls'][0]['endpoints'][0]['ratings_by_type'] # Add graph statistics, so the graphs can be instantly created based on report data add_statistics_over_ratings(fake_calculation) assert "statistics_per_issue_type" in fake_calculation assert "internet_nl_web_ipv6_ws_address" in fake_calculation["statistics_per_issue_type"] # todo: we can add some tests here to see if the aggregation is correct # add some statistics over all these metrics add_percentages_to_statistics(fake_calculation) assert "pct_ok" in fake_calculation["statistics_per_issue_type"]["internet_nl_web_ipv6_ws_address"] # and make sure the report is complete: meaning that all urls requested are present, even though they # could not be scanned. So a top 100 stays a top 100. assert (len(fake_calculation['urls']) == 1) upgrade_report_with_unscannable_urls(fake_report.id, scan.id) fake_report = UrlListReport.objects.all().first() assert(len(fake_report.calculation['urls']) == len(urls)) # the first url should still be by apple: assert fake_report.calculation['urls'][0]['url'] == "apple.com"
def update_list_settings(account: Account, user_input: Dict) -> Dict[str, Any]: """ This cannot update the urls, as that would increase complexity too much. :param account: :param user_input: { 'id': int, 'name': str, 'enable_scans': bool, 'scan_type': str, # todo: Who should set this? Should this be set by admins? How can we avoid permission hell? # Probably as long as the settings are not too detailed / too frequently. 'automated_scan_frequency': str, } :return: """ expected_keys = [ 'id', 'name', 'enable_scans', 'scan_type', 'automated_scan_frequency', 'scheduled_next_scan' ] if not keys_are_present_in_object(expected_keys, user_input): return operation_response(error=True, message="Missing settings.") prefetch_last_scan = Prefetch( 'accountinternetnlscan_set', queryset=AccountInternetNLScan.objects.order_by('-id').select_related( 'scan'), to_attr='last_scan') last_report_prefetch = Prefetch( 'urllistreport_set', # filter(pk=UrlListReport.objects.latest('id').pk). queryset=UrlListReport.objects.order_by('-id').only('id', 'at_when'), to_attr='last_report') urllist = UrlList.objects.all().filter( account=account, id=user_input['id'], is_deleted=False).annotate(num_urls=Count('urls')).prefetch_related( prefetch_last_scan, last_report_prefetch).first() if not urllist: return operation_response(error=True, message="No list of urls found.") # Yes, you can try and set any value. Values that are not recognized do not result in errors / error messages, # instead they will be overwritten with the default. This means less interaction with users / less annoyance over # errors on such simple forms. frequency = validate_list_automated_scan_frequency( user_input['automated_scan_frequency']) data = { 'id': urllist.id, 'account': account, 'name': validate_list_name(user_input['name']), 'enable_scans': bool(user_input['enable_scans']), 'scan_type': validate_list_scan_type(user_input['scan_type']), 'automated_scan_frequency': frequency, 'scheduled_next_scan': determine_next_scan_moment(frequency), } updated_urllist = UrlList(**data) updated_urllist.save() # make sure the account is serializable, inject other data. data['account'] = account.id data['num_urls'] = urllist.num_urls data['last_scan_id'] = None data['last_scan_state'] = None data['last_scan'] = None data['last_scan_finished'] = None data['last_report_id'] = None data['last_report_date'] = None if urllist.last_scan: data['last_scan_id'] = urllist.last_scan[0].scan.id data['last_scan_state'] = urllist.last_scan[0].state data['last_scan'] = urllist.last_scan[0].started_on.isoformat() data['last_scan_finished'] = urllist.last_scan[0].state in [ "finished", "cancelled" ] if urllist.last_report: data['last_report_id'] = urllist.last_report[0].id data['last_report_date'] = urllist.last_report[0].at_when data['scan_now_available'] = updated_urllist.is_scan_now_available() # list warnings (might do: make more generic, only if another list warning ever could occur.) list_warnings = [] if urllist.num_urls > config.DASHBOARD_MAXIMUM_DOMAINS_PER_LIST: list_warnings.append('WARNING_DOMAINS_IN_LIST_EXCEED_MAXIMUM_ALLOWED') data['list_warnings'] = [] log.debug(data) # Sprinkling an activity stream action. action.send(account, verb='updated list', target=updated_urllist, public=False) return operation_response(success=True, message="Updated list settings", data=data)