def get_lastest_map_health_data(country: str = "NL", organization_type: str = "municipality") -> Dict[str, Any]: map_configuration = ( Configuration.objects.all() .filter(country=get_country(country), organization_type=get_organization_type(organization_type)) .first() ) if not map_configuration: return { "outdate_period_in_hours": -1, "percentage_up_to_date": -1, "percentage_out_of_date": -1, "amount_up_to_date": -1, "amount_out_of_date": -1, "per_scan": [], } try: mh = MapHealthReport.objects.all().filter(map_configuration=map_configuration).latest("at_when") except MapHealthReport.DoesNotExist: return { "outdate_period_in_hours": -1, "percentage_up_to_date": -1, "percentage_out_of_date": -1, "amount_up_to_date": -1, "amount_out_of_date": -1, "per_scan": [], } return mh.detailed_report
def get_top_fail_data(country: str = "NL", organization_type="municipality", weeks_back=0): when = get_when(weeks_back) cursor = connection.cursor() sql = """ SELECT low, organization.name, organizations_organizationtype.name, organization.id, at_when, organization.twitter_handle, high, medium, low, total_urls, total_endpoints FROM map_organizationreport INNER JOIN organization on organization.id = map_organizationreport.organization_id INNER JOIN organizations_organizationtype on organizations_organizationtype.id = organization.type_id INNER JOIN coordinate ON coordinate.organization_id = organization.id INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as org2 ON org2.id = or2.organization_id WHERE at_when <= '%(when)s' AND org2.country = '%(country)s' AND org2.type_id = '%(OrganizationTypeId)s' GROUP BY organization_id ) as stacked_organization_report ON stacked_organization_report.id2 = map_organizationreport.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 GROUP BY organization.name HAVING high > 0 or medium > 0 ORDER BY high DESC, medium DESC, medium DESC, organization.name ASC """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } # log.debug(sql) cursor.execute(sql) rows = cursor.fetchall() return rows_to_dataset(rows, when)
def get_organization_report_by_name( country: str = DEFAULT_COUNTRY, organization_type=DEFAULT_LAYER, organization_name=None, weeks_back=0 ): organization = Organization.objects.filter( computed_name_slug=slugify(organization_name), country=get_country(country), type=get_organization_type(organization_type), is_dead=False, ).first() log.debug("- %s %s %s " % (organization_name, get_country(country), get_organization_type(organization_type))) log.debug(organization) if not organization: return {} return get_organization_report_by_id(organization.pk, weeks_back)
def export_coordinates(country: str = "NL", organization_type="municipality"): organizations = Organization.objects.all().filter( country=get_country(country), type=get_organization_type(organization_type)) columns = ["id", "organization", "geojsontype", "area"] query = (Coordinate.objects.all().filter( organization__in=list(organizations), is_dead=False).values_list("id", "organization", "geojsontype", "area")) return generic_export(query, columns)
def export_urls(country: str = "NL", organization_type="municipality"): organizations = Organization.objects.all().filter( country=get_country(country), type=get_organization_type(organization_type)) columns = ["id", "url", "organization"] query = (Url.objects.all().filter(organization__in=list(organizations), is_dead=False, not_resolvable=False).values_list( "id", "url", "organization")) return generic_export(query, columns)
def get_2ndlevel_domains(country, layer): """ This is a starting point for SIDN to upload information for domains. This is basically a list of all 2nd level domains for a layer. Data for this is uploaded. """ urls = (Url.objects.all().filter( Q(computed_subdomain__isnull=True) | Q(computed_subdomain=""), organization__country=get_country(country), organization__type=get_organization_type(layer), ).values_list("url", flat=True)) urls = list(set(urls)) return urls
def create_filename(set_name, country: str = "NL", organization_type="municipality"): country = get_country(country) organization_type_name = OrganizationType.objects.filter( name=organization_type).values("name").first() if not organization_type_name: organization_type_name = "municipality" else: organization_type_name = organization_type_name.get("name") filename = "%s_%s_%s_%s" % (country, organization_type_name, set_name, timezone.datetime.now().date()) return filename
def export_urls_only(country: str = "NL", organization_type="municipality"): # how to reuse the column definition as both headers columns = [ "id", "url", "not_resolvable", "is_dead", "computed_subdomain", "computed_domain", "computed_suffix" ] queryset = (Url.objects.all().filter( is_dead=False, not_resolvable=False, organization__is_dead=False, organization__country=get_country(country), organization__type=get_organization_type(organization_type), ).values_list("id", "url", "not_resolvable", "is_dead", "computed_subdomain", "computed_domain", "computed_suffix")) return generic_export(queryset, columns)
def get_vulnerability_graph(country, organization_type, weeks_back): organization_type_id = get_organization_type(organization_type) country = get_country(country) when = timezone.now() - relativedelta(weeks=int(weeks_back)) one_year_ago = when - timedelta(days=365) data = (VulnerabilityStatistic.objects.all().filter( organization_type=organization_type_id, country=country, at_when__lte=when, at_when__gte=one_year_ago).order_by("scan_type", "at_when")) """ Desired output: "security_headers_x_frame_options": [ { "date": "2018-07-17", "high": 0, "medium": 3950, "low": 0 }, { "date": "2018-07-24", "high": 0, "medium": 2940, "low": 0 }, """ stats = {} for statistic in data: if statistic.scan_type not in stats: stats[statistic.scan_type] = [] stats[statistic.scan_type].append({ "high": statistic.high, "medium": statistic.medium, "low": statistic.low, "date": statistic.at_when.isoformat(), "urls": statistic.urls, "ok_urls": statistic.ok_urls, "endpoints": statistic.endpoints, "ok_endpoints": statistic.ok_endpoints, "ok": statistic.ok, }) return stats
def what_to_improve(country: str, organization_type: str, issue_type: str): # todo: check if the layer is published. policy = POLICY.get(issue_type, None) if not policy: log.debug(f"No policy found for {issue_type}") return [] country = get_country(country) organization_type = get_organization_type(organization_type) if issue_type in URL_SCAN_TYPES: return what_to_improve_ugs(country, organization_type, issue_type, policy) else: return what_to_improve_epgs(country, organization_type, issue_type, policy)
def get_organization_vulnerability_timeline_via_name( organization_name: str, organization_type: str = "", country: str = ""): layer = get_organization_type( organization_type) if organization_type else get_default_layer() country = get_country(code=country) if country else get_default_country() organization = (Organization.objects.all().filter(country=country, type=layer, name=organization_name, is_dead=False).first()) if not organization: return {} return get_organization_vulnerability_timeline(organization.id)
def export_organizations(country: str = "NL", organization_type="municipality"): columns = ["id", "name", "type", "wikidata", "wikipedia", "twitter_handle"] query = (Organization.objects.all().filter( country=get_country(country), type=get_organization_type(organization_type), is_dead=False).values_list("id", "name", "type", "wikidata", "wikipedia", "twitter_handle")) # values doesn't work anymore to determine what columns are relevant for the export. # we need to set it on the factory. # This does not restrain the amount of fields, all fields are included. # the export documentation is pretty terrible (there is none) and the code is obscure. So we're not using this # Left here for references, if anyone decides that # from import_export.resources import modelresource_factory # is a wise choice to use. It's not / it hardly is. # exporter = modelresource_factory(query.model) # exporter.Meta.fields = ['id', 'name', 'type', 'wikidata', 'wikipedia', 'twitter_handle'] # dataset = exporter().export(query) return generic_export(query, columns)
def get_top_win_data(country: str = "NL", organization_type="municipality", weeks_back=0): """ Can't use the object.raw syntax of django. OperationalError: near "%": syntax error... Probably not supported, although the manual says so. Maybe because we use a multiline string? https://code.djangoproject.com/ticket/10070 Dictionary params are not supported with the SQLite backend; with this backend, you must pass parameters as a list. When doing it right, the exception happens: format requires a mapping. Then just rely on the checks we do. :param country: :param organization_type: :param weeks_back: :return: """ when = get_when(weeks_back) cursor = connection.cursor() sql = """ SELECT low, organization.name, organizations_organizationtype.name, organization.id, at_when, organization.twitter_handle, high, medium, low, total_urls, total_endpoints, organization.is_dead FROM map_organizationreport INNER JOIN organization on organization.id = map_organizationreport.organization_id INNER JOIN organizations_organizationtype on organizations_organizationtype.id = organization.type_id INNER JOIN coordinate ON coordinate.organization_id = organization.id INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as org2 ON org2.id = or2.organization_id WHERE at_when <= '%(when)s' AND org2.country = '%(country)s' AND org2.type_id = '%(OrganizationTypeId)s' GROUP BY organization_id ) as stacked_organization_report ON stacked_organization_report.id2 = map_organizationreport.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 GROUP BY organization.name HAVING high = 0 AND medium = 0 ORDER BY low ASC, total_endpoints DESC, organization.name ASC """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } # log.debug(sql) cursor.execute(sql) rows = cursor.fetchall() return rows_to_dataset(rows, when)
def get_map_data(country: str = "NL", organization_type: str = "municipality", days_back: int = 0, displayed_issue: str = None): # A bug in the live version (possibly due to |safe) gives a Country(code='NL')} instead of # the country code of the country. Here this is worked around, but something is wrong and should be fixed. if hasattr(country, "code"): country = country.code when = datetime.now(pytz.utc) - relativedelta(days=int(days_back)) desired_url_scans = [] desired_endpoint_scans = [] if displayed_issue in URL_SCAN_TYPES: desired_url_scans += [displayed_issue] if displayed_issue in ENDPOINT_SCAN_TYPES: desired_endpoint_scans += [displayed_issue] # fallback if no data is "all", which is the default. if not desired_url_scans and not desired_endpoint_scans: desired_url_scans = URL_SCAN_TYPES desired_endpoint_scans = ENDPOINT_SCAN_TYPES filters = ["all"] else: filters = desired_url_scans + desired_endpoint_scans cached = get_cached_map_data(country, organization_type, days_back, filters) if cached: return cached """ Returns a json structure containing all current map data. This is used by the client to render the map. Renditions of this dataset might be pushed to gitlab automatically. :return: """ data = { "metadata": { "type": "FeatureCollection", "render_date": datetime.now(pytz.utc).isoformat(), "data_from_time": when.isoformat(), "remark": remark, "applied filter": displayed_issue, "layer": organization_type, "country": country, }, "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, "features": [], } cursor = connection.cursor() # Sept 2019: MySQL has an issue with mediumtext fields. When joined, and the query is not optimized, the # result will take 2 minutes to complete. Would you not select the mediumtext field, the query finishes in a second. # That is why there are two queries to retrieve map data from the database. sql = """ SELECT map_organizationreport.low, organization.name, organizations_organizationtype.name, coordinate_stack.area, coordinate_stack.geoJsonType, organization.id, map_organizationreport.id as organization_report_id, map_organizationreport.high, map_organizationreport.medium, map_organizationreport.low, map_organizationreport.total_issues, map_organizationreport.total_urls, map_organizationreport.high_urls, map_organizationreport.medium_urls, map_organizationreport.low_urls, coordinate_stack.stacked_coordinate_id FROM map_organizationreport INNER JOIN (SELECT stacked_organization.id as stacked_organization_id FROM organization stacked_organization WHERE ( stacked_organization.created_on <= '%(when)s' AND stacked_organization.is_dead = 0 AND stacked_organization.type_id=%(OrganizationTypeId)s AND stacked_organization.country='%(country)s' ) OR ( '%(when)s' BETWEEN stacked_organization.created_on AND stacked_organization.is_dead_since AND stacked_organization.is_dead = 1 AND stacked_organization.type_id=%(OrganizationTypeId)s AND stacked_organization.country='%(country)s' )) as organization_stack ON organization_stack.stacked_organization_id = map_organizationreport.organization_id INNER JOIN organization on organization.id = stacked_organization_id INNER JOIN organizations_organizationtype on organizations_organizationtype.id = organization.type_id INNER JOIN (SELECT MAX(stacked_coordinate.id) as stacked_coordinate_id, area, geoJsonType, organization_id FROM coordinate stacked_coordinate INNER JOIN organization filter_organization ON (stacked_coordinate.organization_id = filter_organization.id) WHERE ( stacked_coordinate.created_on <= '%(when)s' AND stacked_coordinate.is_dead = 0 AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s ) OR ('%(when)s' BETWEEN stacked_coordinate.created_on AND stacked_coordinate.is_dead_since AND stacked_coordinate.is_dead = 1 AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s ) GROUP BY area, organization_id ) as coordinate_stack ON coordinate_stack.organization_id = map_organizationreport.organization_id INNER JOIN (SELECT MAX(map_organizationreport.id) as stacked_organizationrating_id FROM map_organizationreport INNER JOIN organization filter_organization2 ON (filter_organization2.id = map_organizationreport.organization_id) WHERE at_when <= '%(when)s' AND filter_organization2.country='%(country)s' AND filter_organization2.type_id=%(OrganizationTypeId)s GROUP BY organization_id ) as stacked_organizationrating ON stacked_organizationrating.stacked_organizationrating_id = map_organizationreport.id WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country= '%(country)s' GROUP BY coordinate_stack.area, organization.name ORDER BY map_organizationreport.at_when ASC """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } cursor.execute(sql) rows = cursor.fetchall() needed_reports = [] for i in rows: # prevent sequence item 0: expected str instance, int found needed_reports.append(str(i[6])) reports = get_reports_by_ids(needed_reports) # todo: http://www.gadzmo.com/python/using-pythons-dictcursor-in-mysql-to-return-a-dict-with-keys/ # unfortunately numbered results are used. There is no decent solution for sqlite and the column to dict # translation is somewhat hairy. A rawquery would probably be better if possible. for i in rows: # Here we're going to do something stupid: to rebuild the high, medium, low classifcation based on scan_types # It's somewhat insane to do it like this, but it's also insane to keep adding columns for each vulnerability # that's added to the system. This solution will be a bit slow, but given the caching and such it wouldn't # hurt too much. # Also: we've optimized for calculation in the past, but we're not even using it until now. So that part of # this code is pretty optimized :) # This feature is created to give an instant overview of what issues are where. This will lead more clicks to # reports. # The caching of this url should be decent, as people want to click fast. Filtering on the client # would be possible using the calculation field. Perhaps that should be the way. Yet then we have to do # filtering with javascript, which is error prone (todo: this will be done in the future, as it responds faster # but it will also mean an enormous increase of data sent to the client.) # It's actually reasonably fast. high, medium, low, ok = 0, 0, 0, 0 calculation = json.loads(reports[i[6]]) for url in calculation["organization"]["urls"]: for url_rating in url["ratings"]: if (url_rating["type"] in desired_url_scans and url_rating.get( "comply_or_explain_valid_at_time_of_report", False) is False): high += url_rating["high"] medium += url_rating["medium"] low += url_rating["low"] ok += url_rating["ok"] # it's possible the url doesn't have ratings. for endpoint in url["endpoints"]: for endpoint_rating in endpoint["ratings"]: if (endpoint_rating["type"] in desired_endpoint_scans and endpoint_rating.get( "comply_or_explain_valid_at_time_of_report", False) is False): high += endpoint_rating["high"] medium += endpoint_rating["medium"] low += endpoint_rating["low"] ok += endpoint_rating["ok"] # figure out if red, orange or green: # #162, only make things red if there is a critical issue. # removed json parsing of the calculation. This saves time. # no contents, no endpoint ever mentioned in any url (which is a standard attribute) if "total_urls" not in calculation["organization"] or not calculation[ "organization"]["total_urls"]: severity = "unknown" else: # things have to be OK in order to be colored. If it's all empty... then it's not OK. severity = "high" if high else "medium" if medium else "low" if low else "good" if ok else "unknown" dataset = { "type": "Feature", "properties": { "organization_id": i[5], "organization_type": i[2], "organization_name": i[1], "organization_name_lowercase": i[1].lower(), "organization_slug": slugify(i[1]), "additional_keywords": extract_domains(calculation), "high": high, "medium": medium, "low": low, "data_from": when.isoformat(), "severity": severity, "total_urls": i[11], # = 100% "high_urls": i[12], "medium_urls": i[13], "low_urls": i[14], }, "geometry": { # the coordinate ID makes it easy to check if the geometry has changed shape/location. "coordinate_id": i[15], "type": i[4], # Sometimes the data is a string, sometimes it's a list. The admin # interface might influence this. The fastest would be to use a string, instead of # loading some json. "coordinates": proper_coordinate(i[3], i[4]), }, } # calculate some statistics, so the frontends do not have to... # prevent division by zero if i[11]: total_urls = int(i[11]) high_urls = int(i[12]) medium_urls = int(i[13]) low_urls = int(i[14]) dataset["properties"]["percentages"] = { "high_urls": round(high_urls / total_urls, 2) * 100, "medium_urls": round(medium_urls / total_urls, 2) * 100, "low_urls": round(low_urls / total_urls, 2) * 100, "good_urls": round((total_urls - (high_urls + medium_urls + low_urls)) / total_urls, 2) * 100, } else: dataset["properties"]["percentages"] = { "high_urls": 0, "medium_urls": 0, "low_urls": 0, "good_urls": 0, } data["features"].append(dataset) return data
def get_improvements(country, organization_type, weeks_back, weeks_duration): # todo: adjustable timespan # todo: adjustable weeks_back weeks_back = int(weeks_back) weeks_duration = int(weeks_duration) if not weeks_duration: weeks_duration = 1 when = get_when(weeks_back) # looks a lot like graphs, but then just subtract/add some values and done (?) # compare the first urlrating to the last urlrating # but do not include urls that don't exist. sql = """ SELECT reporting_urlreport.id as id, calculation FROM reporting_urlreport INNER JOIN ( SELECT MAX(id) as id2 FROM reporting_urlreport or2 WHERE at_when <= '%(when)s' GROUP BY url_id ) as x ON x.id2 = reporting_urlreport.id INNER JOIN url ON reporting_urlreport.url_id = url.id INNER JOIN url_organization on url.id = url_organization.url_id INNER JOIN organization ON url_organization.organization_id = organization.id WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } newest_urlratings = UrlReport.objects.raw(sql) # this of course doesn't work with the first day, as then we didn't measure # everything (and the ratings for several issues are 0... sql = """ SELECT reporting_urlreport.id as id, calculation FROM reporting_urlreport INNER JOIN ( SELECT MAX(id) as id2 FROM reporting_urlreport or2 WHERE at_when <= '%(when)s' GROUP BY url_id ) as x ON x.id2 = reporting_urlreport.id INNER JOIN url ON reporting_urlreport.url_id = url.id INNER JOIN url_organization on url.id = url_organization.url_id INNER JOIN organization ON url_organization.organization_id = organization.id WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' """ % { "when": when - timedelta(days=(weeks_duration * 7)), "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } oldest_urlratings = UrlReport.objects.raw(sql) old_measurement = {} new_measurement = {} scan_types = [] # stats for the newest, should be made a function: for urlrating in newest_urlratings: # url level, why are there reports without url ratings / empty url ratings like if "ratings" in urlrating.calculation: for rating in urlrating.calculation["ratings"]: if rating["type"] not in new_measurement: new_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) new_measurement[rating["type"]]["high"] += rating["high"] new_measurement[rating["type"]]["medium"] += rating["medium"] new_measurement[rating["type"]]["low"] += rating["low"] if "endpoints" not in urlrating.calculation: continue for endpoint in urlrating.calculation["endpoints"]: for rating in endpoint["ratings"]: if rating["type"] not in new_measurement: new_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) new_measurement[rating["type"]]["high"] += rating["high"] new_measurement[rating["type"]]["medium"] += rating["medium"] new_measurement[rating["type"]]["low"] += rating["low"] # and the oldest stats, which should be the same function for urlrating in oldest_urlratings: if "ratings" in urlrating.calculation: for rating in urlrating.calculation["ratings"]: if rating["type"] not in old_measurement: old_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) old_measurement[rating["type"]]["high"] += rating["high"] old_measurement[rating["type"]]["medium"] += rating["medium"] old_measurement[rating["type"]]["low"] += rating["low"] if "endpoints" not in urlrating.calculation: continue for endpoint in urlrating.calculation["endpoints"]: for rating in endpoint["ratings"]: if rating["type"] not in old_measurement: old_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) old_measurement[rating["type"]]["high"] += rating["high"] old_measurement[rating["type"]]["medium"] += rating["medium"] old_measurement[rating["type"]]["low"] += rating["low"] # and now do some magic to see the changes in this timespan: changes = {} for scan_type in scan_types: if scan_type not in changes: changes[scan_type] = {} if scan_type not in old_measurement: old_measurement[scan_type] = {} if scan_type not in new_measurement: new_measurement[scan_type] = {} changes[scan_type] = { "old": { "date": timezone.now() - timedelta(days=(weeks_duration * 7)), "high": old_measurement[scan_type].get("high", 0), "medium": old_measurement[scan_type].get("medium", 0), "low": old_measurement[scan_type].get("low", 0), }, "new": { "date": when, "high": new_measurement[scan_type].get("high", 0), "medium": new_measurement[scan_type].get("medium", 0), "low": new_measurement[scan_type].get("low", 0), }, "improvements": { "high": old_measurement[scan_type].get("high", 0) - new_measurement[scan_type].get("high", 0), "medium": old_measurement[scan_type].get("medium", 0) - new_measurement[scan_type].get("medium", 0), "low": old_measurement[scan_type].get("low", 0) - new_measurement[scan_type].get("low", 0), }, } # and now for overall changes, what everyone is coming for... for scan_type in scan_types: changes["overall"] = { "old": { "high": changes.get("overall", {}).get("old", {}).get("high", 0) + changes[scan_type]["old"]["high"], "medium": changes.get("overall", {}).get("old", {}).get("medium", 0) + changes[scan_type]["old"]["medium"], "low": changes.get("overall", {}).get("old", {}).get("low", 0) + changes[scan_type]["old"]["low"], }, "new": { "high": changes.get("overall", {}).get("new", {}).get("high", 0) + changes[scan_type]["new"]["high"], "medium": changes.get("overall", {}).get("new", {}).get("medium", 0) + changes[scan_type]["new"]["medium"], "low": changes.get("overall", {}).get("new", {}).get("low", 0) + changes[scan_type]["new"]["low"], }, "improvements": { "high": changes.get("overall", {}).get("improvements", {}).get( "high", 0) + changes[scan_type]["improvements"]["high"], "medium": changes.get("overall", {}).get("improvements", {}).get( "medium", 0) + changes[scan_type]["improvements"]["medium"], "low": changes.get("overall", {}).get("improvements", {}).get( "low", 0) + changes[scan_type]["improvements"]["low"], }, } return changes
def get_all_latest_scans(country, organization_type): dataset = { "scans": defaultdict(list), "render_date": timezone.now().isoformat(), "remark": remark, } filtered_organization_type = get_organization_type(organization_type) filtered_country = get_country(country) # Really get the latest, without double results that apply for multiple organizations. # Do not show anything that is dead, on any level. for scan_type in PUBLISHED_ENDPOINT_SCAN_TYPES: scans = ( EndpointGenericScan.objects.filter( type=scan_type, is_the_latest_scan=True, ) .annotate( n_urls=Count( "endpoint", filter=Q( endpoint__is_dead=False, endpoint__url__not_resolvable=False, endpoint__url__is_dead=False, endpoint__url__organization__is_dead=False, endpoint__url__organization__country=filtered_country, endpoint__url__organization__type_id=filtered_organization_type, ), ) ) .filter(n_urls__gte=1) .order_by("-rating_determined_on")[0:6] ) print(scans.query) for scan in scans: calculation = get_severity(scan) dataset["scans"][scan_type].append( { "url": scan.endpoint.url.url, "service": f"{scan.endpoint.protocol}/{scan.endpoint.port} (IPv{scan.endpoint.ip_version})", "protocol": scan.endpoint.protocol, "port": scan.endpoint.port, "ip_version": scan.endpoint.ip_version, "explanation": calculation.get("explanation", ""), "high": calculation.get("high", 0), "medium": calculation.get("medium", 0), "low": calculation.get("low", 0), "last_scan_humanized": naturaltime(scan.last_scan_moment), "last_scan_moment": scan.last_scan_moment.isoformat(), } ) for scan_type in PUBLISHED_URL_SCAN_TYPES: scans = ( UrlGenericScan.objects.filter( type=scan_type, is_the_latest_scan=True, ) .annotate( n_urls=Count( "url", filter=Q( url__organization__is_dead=False, url__organization__country=filtered_country, url__organization__type_id=filtered_organization_type, url__is_dead=False, url__not_resolvable=False, ), ) ) .filter(n_urls=1) .order_by("-rating_determined_on")[0:6] ) for scan in scans: calculation = get_severity(scan) # url scans dataset["scans"][scan_type].append( { "url": scan.url.url, "service": f"{scan.url.url}", "protocol": scan_type, "port": "-", "ip_version": "-", "explanation": calculation.get("explanation", ""), "high": calculation.get("high", 0), "medium": calculation.get("medium", 0), "low": calculation.get("low", 0), "last_scan_humanized": naturaltime(scan.last_scan_moment), "last_scan_moment": scan.last_scan_moment.isoformat(), } ) return dataset
def get_ticker_data( country: str = "NL", organization_type: str = "municipality", weeks_back: int = 0, weeks_duration: int = 0 ): weeks_back = int(weeks_back) weeks_duration = int(weeks_duration) # Gives ticker data of organizations, like in news scrolling: # On organization level, could be on URL level in the future (selecing more cool urls?) # Organizations are far more meaningful. # Amsterdam 42 +1, 32 +2, 12 -, Zutphen 12 -3, 32 -1, 3 +3, etc. if not weeks_duration: weeks_duration = 10 when = get_when(weeks_back) # looks a lot like graphs, but then just subtract/add some values and done (?) # compare the first urlrating to the last urlrating # but do not include urls that don't exist. sql = """ SELECT map_organizationreport.id as id, name, high, medium, low FROM map_organizationreport INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as filter_organization ON (filter_organization.id = or2.organization_id) WHERE at_when <= '%(when)s' AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s GROUP BY organization_id ) as stacked_organizationreport ON stacked_organizationreport.id2 = map_organizationreport.id INNER JOIN organization ON map_organizationreport.organization_id = organization.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } newest_urlratings = list(OrganizationReport.objects.raw(sql)) # this of course doesn't work with the first day, as then we didn't measure # everything (and the ratings for several issues are 0... sql = """ SELECT map_organizationreport.id as id, name, high, medium, low FROM map_organizationreport INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as filter_organization ON (filter_organization.id = or2.organization_id) WHERE at_when <= '%(when)s' AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s GROUP BY organization_id ) as stacked_organizationreport ON stacked_organizationreport.id2 = map_organizationreport.id INNER JOIN organization ON map_organizationreport.organization_id = organization.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 """ % { "when": when - timedelta(days=(weeks_duration * 7)), "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } oldest_urlratings = list(OrganizationReport.objects.raw(sql)) # create a dict, where the keys are pointing to the ratings. This makes it easy to match the # correct ones. And handle missing oldest ratings for example. oldest_urlratings_dict = {} for oldest_urlrating in oldest_urlratings: oldest_urlratings_dict[oldest_urlrating.name] = oldest_urlrating # insuccesful rebuild? Or not enough organizations? if not newest_urlratings: return {"changes": {}, "slogan": config.TICKER_SLOGAN} changes = [] for newest_urlrating in newest_urlratings: try: matching_oldest = oldest_urlratings_dict[newest_urlrating.name] except KeyError: matching_oldest = None if not matching_oldest: high_then = medium_then = low_then = "-" high_changes = newest_urlrating.high medium_changes = newest_urlrating.medium low_changes = newest_urlrating.low else: high_then = matching_oldest.high medium_then = matching_oldest.medium low_then = matching_oldest.low high_changes = newest_urlrating.high - matching_oldest.high medium_changes = newest_urlrating.medium - matching_oldest.medium low_changes = newest_urlrating.low - matching_oldest.low change = { "organization": newest_urlrating.name, "high_now": newest_urlrating.high, "medium_now": newest_urlrating.medium, "low_now": newest_urlrating.low, "high_then": high_then, "medium_then": medium_then, "low_then": low_then, "high_changes": high_changes, "medium_changes": medium_changes, "low_changes": int(low_changes), } changes.append(change) data = {"changes": changes, "slogan": config.TICKER_SLOGAN} return data
def get_all_explains(country, organization_type, limit=0): limit = 999999 if limit == 0 else limit country = get_country(country) organization_type = get_organization_type(organization_type) # some urls are used by a lot of organizations, we only need the distinct explanation, and # the organizations will be matched later. # https://stackoverflow.com/questions/30752268/how-to-filter-objects-for-count-annotation-in-django # we're currently not taking into account what endpoint, URL's and organizations are alive at a certain point. # in that sense we're not taking history in account... Only what is relevant _NOW_ # This should change, adding some nice complex Q conditions for checking a certain date. # something like: & ( Q(url__organization__is_dead=False, created_on__lte=some_date) # | Q(is_dead=True, created_on__lte=some_date, is_dead_since__gte=some_date) # # & ( Q(is_dead=False, created_on__lte=some_date) # | Q(is_dead=True, created_on__lte=some_date, is_dead_since__gte=some_date) ugss = ( UrlGenericScan.objects.all() .filter( comply_or_explain_is_explained=True, is_the_latest_scan=True, ) .annotate( n_urls=Count( "url", filter=Q( url__organization__is_dead=False, url__organization__country=country, url__organization__type_id=organization_type, url__is_dead=False, url__not_resolvable=False, ), ) ) .filter(n_urls=1) .order_by("-comply_or_explain_explained_on")[0:limit] ) egss = ( EndpointGenericScan.objects.all() .filter( comply_or_explain_is_explained=True, is_the_latest_scan=True, ) .annotate( n_urls=Count( "endpoint", filter=Q( endpoint__is_dead=False, endpoint__url__not_resolvable=False, endpoint__url__is_dead=False, endpoint__url__organization__is_dead=False, endpoint__url__organization__country=country, endpoint__url__organization__type_id=organization_type, ), ) ) .filter(n_urls__gte=1) .order_by("-comply_or_explain_explained_on")[0:limit] ) explains = [] for scan in ugss: explains.append(get_explanation("url", scan)) for scan in egss: explains.append(get_explanation("endpoint", scan)) # sorting explains = sorted(explains, key=lambda k: (k["explained_on"]), reverse=True) return explains