Beispiel #1
0
def get_lastest_map_health_data(country: str = "NL", organization_type: str = "municipality") -> Dict[str, Any]:
    map_configuration = (
        Configuration.objects.all()
        .filter(country=get_country(country), organization_type=get_organization_type(organization_type))
        .first()
    )

    if not map_configuration:
        return {
            "outdate_period_in_hours": -1,
            "percentage_up_to_date": -1,
            "percentage_out_of_date": -1,
            "amount_up_to_date": -1,
            "amount_out_of_date": -1,
            "per_scan": [],
        }
    try:
        mh = MapHealthReport.objects.all().filter(map_configuration=map_configuration).latest("at_when")
    except MapHealthReport.DoesNotExist:
        return {
            "outdate_period_in_hours": -1,
            "percentage_up_to_date": -1,
            "percentage_out_of_date": -1,
            "amount_up_to_date": -1,
            "amount_out_of_date": -1,
            "per_scan": [],
        }

    return mh.detailed_report
Beispiel #2
0
def get_top_fail_data(country: str = "NL", organization_type="municipality", weeks_back=0):

    when = get_when(weeks_back)
    cursor = connection.cursor()

    sql = """
            SELECT
                low,
                organization.name,
                organizations_organizationtype.name,
                organization.id,
                at_when,
                organization.twitter_handle,
                high,
                medium,
                low,
                total_urls,
                total_endpoints
            FROM map_organizationreport
            INNER JOIN
              organization on organization.id = map_organizationreport.organization_id
            INNER JOIN
              organizations_organizationtype on organizations_organizationtype.id = organization.type_id
            INNER JOIN
              coordinate ON coordinate.organization_id = organization.id
            INNER JOIN
              (
                SELECT MAX(or2.id) as id2 FROM map_organizationreport or2
                INNER JOIN organization as org2 ON org2.id = or2.organization_id
                WHERE at_when <= '%(when)s'
                  AND org2.country = '%(country)s'
                  AND org2.type_id = '%(OrganizationTypeId)s'
                GROUP BY organization_id
              ) as stacked_organization_report
            ON stacked_organization_report.id2 = map_organizationreport.id
            WHERE
              (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
               AND organization.is_dead = 1
               ) OR (
               organization.created_on <= '%(when)s'
               AND organization.is_dead = 0
              ))
              AND organization.type_id = '%(OrganizationTypeId)s'
              AND organization.country = '%(country)s'
              AND total_urls > 0
            GROUP BY organization.name
            HAVING high > 0 or medium > 0
            ORDER BY high DESC, medium DESC, medium DESC, organization.name ASC
            """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    # log.debug(sql)
    cursor.execute(sql)
    rows = cursor.fetchall()
    return rows_to_dataset(rows, when)
Beispiel #3
0
def get_organization_report_by_name(
    country: str = DEFAULT_COUNTRY, organization_type=DEFAULT_LAYER, organization_name=None, weeks_back=0
):

    organization = Organization.objects.filter(
        computed_name_slug=slugify(organization_name),
        country=get_country(country),
        type=get_organization_type(organization_type),
        is_dead=False,
    ).first()

    log.debug("- %s %s %s " % (organization_name, get_country(country), get_organization_type(organization_type)))
    log.debug(organization)

    if not organization:
        return {}

    return get_organization_report_by_id(organization.pk, weeks_back)
Beispiel #4
0
def export_coordinates(country: str = "NL", organization_type="municipality"):
    organizations = Organization.objects.all().filter(
        country=get_country(country),
        type=get_organization_type(organization_type))

    columns = ["id", "organization", "geojsontype", "area"]
    query = (Coordinate.objects.all().filter(
        organization__in=list(organizations),
        is_dead=False).values_list("id", "organization", "geojsontype",
                                   "area"))

    return generic_export(query, columns)
Beispiel #5
0
def export_urls(country: str = "NL", organization_type="municipality"):
    organizations = Organization.objects.all().filter(
        country=get_country(country),
        type=get_organization_type(organization_type))

    columns = ["id", "url", "organization"]

    query = (Url.objects.all().filter(organization__in=list(organizations),
                                      is_dead=False,
                                      not_resolvable=False).values_list(
                                          "id", "url", "organization"))

    return generic_export(query, columns)
Beispiel #6
0
def get_2ndlevel_domains(country, layer):
    """
    This is a starting point for SIDN to upload information for domains. This is basically a list of all
    2nd level domains for a layer. Data for this is uploaded.
    """
    urls = (Url.objects.all().filter(
        Q(computed_subdomain__isnull=True) | Q(computed_subdomain=""),
        organization__country=get_country(country),
        organization__type=get_organization_type(layer),
    ).values_list("url", flat=True))

    urls = list(set(urls))

    return urls
Beispiel #7
0
def create_filename(set_name,
                    country: str = "NL",
                    organization_type="municipality"):
    country = get_country(country)
    organization_type_name = OrganizationType.objects.filter(
        name=organization_type).values("name").first()

    if not organization_type_name:
        organization_type_name = "municipality"
    else:
        organization_type_name = organization_type_name.get("name")

    filename = "%s_%s_%s_%s" % (country, organization_type_name, set_name,
                                timezone.datetime.now().date())
    return filename
Beispiel #8
0
def export_urls_only(country: str = "NL", organization_type="municipality"):
    # how to reuse the column definition as both headers
    columns = [
        "id", "url", "not_resolvable", "is_dead", "computed_subdomain",
        "computed_domain", "computed_suffix"
    ]
    queryset = (Url.objects.all().filter(
        is_dead=False,
        not_resolvable=False,
        organization__is_dead=False,
        organization__country=get_country(country),
        organization__type=get_organization_type(organization_type),
    ).values_list("id", "url", "not_resolvable", "is_dead",
                  "computed_subdomain", "computed_domain", "computed_suffix"))
    return generic_export(queryset, columns)
Beispiel #9
0
def get_vulnerability_graph(country, organization_type, weeks_back):

    organization_type_id = get_organization_type(organization_type)
    country = get_country(country)
    when = timezone.now() - relativedelta(weeks=int(weeks_back))

    one_year_ago = when - timedelta(days=365)

    data = (VulnerabilityStatistic.objects.all().filter(
        organization_type=organization_type_id,
        country=country,
        at_when__lte=when,
        at_when__gte=one_year_ago).order_by("scan_type", "at_when"))
    """
    Desired output:
      "security_headers_x_frame_options": [
        {
          "date": "2018-07-17",
          "high": 0,
          "medium": 3950,
          "low": 0
        },
        {
          "date": "2018-07-24",
          "high": 0,
          "medium": 2940,
          "low": 0
        },
    """
    stats = {}

    for statistic in data:
        if statistic.scan_type not in stats:
            stats[statistic.scan_type] = []

        stats[statistic.scan_type].append({
            "high": statistic.high,
            "medium": statistic.medium,
            "low": statistic.low,
            "date": statistic.at_when.isoformat(),
            "urls": statistic.urls,
            "ok_urls": statistic.ok_urls,
            "endpoints": statistic.endpoints,
            "ok_endpoints": statistic.ok_endpoints,
            "ok": statistic.ok,
        })

    return stats
Beispiel #10
0
def what_to_improve(country: str, organization_type: str, issue_type: str):
    # todo: check if the layer is published.

    policy = POLICY.get(issue_type, None)
    if not policy:
        log.debug(f"No policy found for {issue_type}")
        return []

    country = get_country(country)
    organization_type = get_organization_type(organization_type)

    if issue_type in URL_SCAN_TYPES:
        return what_to_improve_ugs(country, organization_type, issue_type,
                                   policy)
    else:
        return what_to_improve_epgs(country, organization_type, issue_type,
                                    policy)
Beispiel #11
0
def get_organization_vulnerability_timeline_via_name(
        organization_name: str,
        organization_type: str = "",
        country: str = ""):

    layer = get_organization_type(
        organization_type) if organization_type else get_default_layer()
    country = get_country(code=country) if country else get_default_country()

    organization = (Organization.objects.all().filter(country=country,
                                                      type=layer,
                                                      name=organization_name,
                                                      is_dead=False).first())

    if not organization:
        return {}

    return get_organization_vulnerability_timeline(organization.id)
Beispiel #12
0
def export_organizations(country: str = "NL",
                         organization_type="municipality"):
    columns = ["id", "name", "type", "wikidata", "wikipedia", "twitter_handle"]
    query = (Organization.objects.all().filter(
        country=get_country(country),
        type=get_organization_type(organization_type),
        is_dead=False).values_list("id", "name", "type", "wikidata",
                                   "wikipedia", "twitter_handle"))
    # values doesn't work anymore to determine what columns are relevant for the export.
    # we need to set it on the factory.

    # This does not restrain the amount of fields, all fields are included.
    # the export documentation is pretty terrible (there is none) and the code is obscure. So we're not using this
    # Left here for references, if anyone decides that
    # from import_export.resources import modelresource_factory
    # is a wise choice to use. It's not / it hardly is.
    # exporter = modelresource_factory(query.model)
    # exporter.Meta.fields = ['id', 'name', 'type', 'wikidata', 'wikipedia', 'twitter_handle']
    # dataset = exporter().export(query)

    return generic_export(query, columns)
Beispiel #13
0
def get_top_win_data(country: str = "NL", organization_type="municipality", weeks_back=0):
    """
    Can't use the object.raw syntax of django. OperationalError: near "%": syntax error...
    Probably not supported, although the manual says so. Maybe because we use a multiline string?

    https://code.djangoproject.com/ticket/10070
    Dictionary params are not supported with the SQLite backend; with this backend, you must pass parameters as a list.

    When doing it right, the exception happens: format requires a mapping. Then just rely on the checks we do.
    :param country:
    :param organization_type:
    :param weeks_back:
    :return:
    """
    when = get_when(weeks_back)

    cursor = connection.cursor()
    sql = """
            SELECT
              low,
              organization.name,
              organizations_organizationtype.name,
              organization.id,
              at_when,
              organization.twitter_handle,
              high,
              medium,
              low,
              total_urls,
              total_endpoints,
              organization.is_dead
            FROM map_organizationreport
            INNER JOIN
              organization on organization.id = map_organizationreport.organization_id
            INNER JOIN
              organizations_organizationtype on organizations_organizationtype.id = organization.type_id
            INNER JOIN
              coordinate ON coordinate.organization_id = organization.id
            INNER JOIN
              (
                SELECT MAX(or2.id) as id2 FROM map_organizationreport or2
                INNER JOIN organization as org2 ON org2.id = or2.organization_id
                WHERE at_when <= '%(when)s'
                  AND org2.country = '%(country)s'
                  AND org2.type_id = '%(OrganizationTypeId)s'
                GROUP BY organization_id
              ) as stacked_organization_report
            ON stacked_organization_report.id2 = map_organizationreport.id
            WHERE
              (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
               AND organization.is_dead = 1
               ) OR (
               organization.created_on <= '%(when)s'
               AND organization.is_dead = 0
              ))
              AND organization.type_id = '%(OrganizationTypeId)s'
              AND organization.country = '%(country)s'
              AND total_urls > 0
            GROUP BY organization.name
            HAVING high = 0 AND medium = 0
            ORDER BY low ASC, total_endpoints DESC, organization.name ASC
            """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    # log.debug(sql)
    cursor.execute(sql)
    rows = cursor.fetchall()
    return rows_to_dataset(rows, when)
Beispiel #14
0
def get_map_data(country: str = "NL",
                 organization_type: str = "municipality",
                 days_back: int = 0,
                 displayed_issue: str = None):

    # A bug in the live version (possibly due to |safe) gives a Country(code='NL')} instead of
    # the country code of the country. Here this is worked around, but something is wrong and should be fixed.
    if hasattr(country, "code"):
        country = country.code

    when = datetime.now(pytz.utc) - relativedelta(days=int(days_back))

    desired_url_scans = []
    desired_endpoint_scans = []

    if displayed_issue in URL_SCAN_TYPES:
        desired_url_scans += [displayed_issue]

    if displayed_issue in ENDPOINT_SCAN_TYPES:
        desired_endpoint_scans += [displayed_issue]

    # fallback if no data is "all", which is the default.
    if not desired_url_scans and not desired_endpoint_scans:
        desired_url_scans = URL_SCAN_TYPES
        desired_endpoint_scans = ENDPOINT_SCAN_TYPES

        filters = ["all"]
    else:
        filters = desired_url_scans + desired_endpoint_scans

    cached = get_cached_map_data(country, organization_type, days_back,
                                 filters)

    if cached:
        return cached
    """
    Returns a json structure containing all current map data.
    This is used by the client to render the map.

    Renditions of this dataset might be pushed to gitlab automatically.

    :return:
    """

    data = {
        "metadata": {
            "type": "FeatureCollection",
            "render_date": datetime.now(pytz.utc).isoformat(),
            "data_from_time": when.isoformat(),
            "remark": remark,
            "applied filter": displayed_issue,
            "layer": organization_type,
            "country": country,
        },
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
            }
        },
        "features": [],
    }

    cursor = connection.cursor()

    # Sept 2019: MySQL has an issue with mediumtext fields. When joined, and the query is not optimized, the
    # result will take 2 minutes to complete. Would you not select the mediumtext field, the query finishes in a second.
    # That is why there are two queries to retrieve map data from the database.
    sql = """
        SELECT
            map_organizationreport.low,
            organization.name,
            organizations_organizationtype.name,
            coordinate_stack.area,
            coordinate_stack.geoJsonType,
            organization.id,
            map_organizationreport.id as organization_report_id,
            map_organizationreport.high,
            map_organizationreport.medium,
            map_organizationreport.low,
            map_organizationreport.total_issues,
            map_organizationreport.total_urls,
            map_organizationreport.high_urls,
            map_organizationreport.medium_urls,
            map_organizationreport.low_urls,
            coordinate_stack.stacked_coordinate_id
        FROM map_organizationreport
        INNER JOIN


          (SELECT stacked_organization.id as stacked_organization_id
          FROM organization stacked_organization
          WHERE (
            stacked_organization.created_on <= '%(when)s'
            AND stacked_organization.is_dead = 0
            AND stacked_organization.type_id=%(OrganizationTypeId)s
            AND stacked_organization.country='%(country)s'
            )
          OR (
          '%(when)s' BETWEEN stacked_organization.created_on AND stacked_organization.is_dead_since
            AND stacked_organization.is_dead = 1
            AND stacked_organization.type_id=%(OrganizationTypeId)s
            AND stacked_organization.country='%(country)s'
          )) as organization_stack
          ON organization_stack.stacked_organization_id = map_organizationreport.organization_id


        INNER JOIN
          organization on organization.id = stacked_organization_id
        INNER JOIN
          organizations_organizationtype on organizations_organizationtype.id = organization.type_id
        INNER JOIN


          (SELECT MAX(stacked_coordinate.id) as stacked_coordinate_id, area, geoJsonType, organization_id
          FROM coordinate stacked_coordinate
          INNER JOIN organization filter_organization
            ON (stacked_coordinate.organization_id = filter_organization.id)
          WHERE (
            stacked_coordinate.created_on <= '%(when)s'
            AND stacked_coordinate.is_dead = 0
            AND filter_organization.country='%(country)s'
            AND filter_organization.type_id=%(OrganizationTypeId)s
            )
          OR
            ('%(when)s' BETWEEN stacked_coordinate.created_on AND stacked_coordinate.is_dead_since
            AND stacked_coordinate.is_dead = 1
            AND filter_organization.country='%(country)s'
            AND filter_organization.type_id=%(OrganizationTypeId)s
            ) GROUP BY area, organization_id
          ) as coordinate_stack
          ON coordinate_stack.organization_id = map_organizationreport.organization_id


        INNER JOIN


          (SELECT MAX(map_organizationreport.id) as stacked_organizationrating_id
          FROM map_organizationreport
          INNER JOIN organization filter_organization2
            ON (filter_organization2.id = map_organizationreport.organization_id)
          WHERE at_when <= '%(when)s'
          AND filter_organization2.country='%(country)s'
          AND filter_organization2.type_id=%(OrganizationTypeId)s
          GROUP BY organization_id
          ) as stacked_organizationrating
          ON stacked_organizationrating.stacked_organizationrating_id = map_organizationreport.id


        WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country= '%(country)s'
        GROUP BY coordinate_stack.area, organization.name
        ORDER BY map_organizationreport.at_when ASC
        """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    cursor.execute(sql)
    rows = cursor.fetchall()

    needed_reports = []
    for i in rows:
        # prevent sequence item 0: expected str instance, int found
        needed_reports.append(str(i[6]))

    reports = get_reports_by_ids(needed_reports)

    # todo: http://www.gadzmo.com/python/using-pythons-dictcursor-in-mysql-to-return-a-dict-with-keys/
    # unfortunately numbered results are used. There is no decent solution for sqlite and the column to dict
    # translation is somewhat hairy. A rawquery would probably be better if possible.

    for i in rows:

        # Here we're going to do something stupid: to rebuild the high, medium, low classifcation based on scan_types
        # It's somewhat insane to do it like this, but it's also insane to keep adding columns for each vulnerability
        # that's added to the system. This solution will be a bit slow, but given the caching and such it wouldn't
        # hurt too much.
        # Also: we've optimized for calculation in the past, but we're not even using it until now. So that part of
        # this code is pretty optimized :)
        # This feature is created to give an instant overview of what issues are where. This will lead more clicks to
        # reports.
        # The caching of this url should be decent, as people want to click fast. Filtering on the client
        # would be possible using the calculation field. Perhaps that should be the way. Yet then we have to do
        # filtering with javascript, which is error prone (todo: this will be done in the future, as it responds faster
        # but it will also mean an enormous increase of data sent to the client.)
        # It's actually reasonably fast.
        high, medium, low, ok = 0, 0, 0, 0

        calculation = json.loads(reports[i[6]])

        for url in calculation["organization"]["urls"]:
            for url_rating in url["ratings"]:
                if (url_rating["type"] in desired_url_scans and url_rating.get(
                        "comply_or_explain_valid_at_time_of_report", False) is
                        False):
                    high += url_rating["high"]
                    medium += url_rating["medium"]
                    low += url_rating["low"]
                    ok += url_rating["ok"]

            # it's possible the url doesn't have ratings.
            for endpoint in url["endpoints"]:
                for endpoint_rating in endpoint["ratings"]:
                    if (endpoint_rating["type"] in desired_endpoint_scans
                            and endpoint_rating.get(
                                "comply_or_explain_valid_at_time_of_report",
                                False) is False):
                        high += endpoint_rating["high"]
                        medium += endpoint_rating["medium"]
                        low += endpoint_rating["low"]
                        ok += endpoint_rating["ok"]

        # figure out if red, orange or green:
        # #162, only make things red if there is a critical issue.
        # removed json parsing of the calculation. This saves time.
        # no contents, no endpoint ever mentioned in any url (which is a standard attribute)
        if "total_urls" not in calculation["organization"] or not calculation[
                "organization"]["total_urls"]:
            severity = "unknown"
        else:
            # things have to be OK in order to be colored. If it's all empty... then it's not OK.
            severity = "high" if high else "medium" if medium else "low" if low else "good" if ok else "unknown"

        dataset = {
            "type": "Feature",
            "properties": {
                "organization_id": i[5],
                "organization_type": i[2],
                "organization_name": i[1],
                "organization_name_lowercase": i[1].lower(),
                "organization_slug": slugify(i[1]),
                "additional_keywords": extract_domains(calculation),
                "high": high,
                "medium": medium,
                "low": low,
                "data_from": when.isoformat(),
                "severity": severity,
                "total_urls": i[11],  # = 100%
                "high_urls": i[12],
                "medium_urls": i[13],
                "low_urls": i[14],
            },
            "geometry": {
                # the coordinate ID makes it easy to check if the geometry has changed shape/location.
                "coordinate_id": i[15],
                "type": i[4],
                # Sometimes the data is a string, sometimes it's a list. The admin
                # interface might influence this. The fastest would be to use a string, instead of
                # loading some json.
                "coordinates": proper_coordinate(i[3], i[4]),
            },
        }

        # calculate some statistics, so the frontends do not have to...
        # prevent division by zero
        if i[11]:
            total_urls = int(i[11])
            high_urls = int(i[12])
            medium_urls = int(i[13])
            low_urls = int(i[14])
            dataset["properties"]["percentages"] = {
                "high_urls":
                round(high_urls / total_urls, 2) * 100,
                "medium_urls":
                round(medium_urls / total_urls, 2) * 100,
                "low_urls":
                round(low_urls / total_urls, 2) * 100,
                "good_urls":
                round((total_urls -
                       (high_urls + medium_urls + low_urls)) / total_urls, 2) *
                100,
            }
        else:
            dataset["properties"]["percentages"] = {
                "high_urls": 0,
                "medium_urls": 0,
                "low_urls": 0,
                "good_urls": 0,
            }

        data["features"].append(dataset)

    return data
Beispiel #15
0
def get_improvements(country, organization_type, weeks_back, weeks_duration):
    # todo: adjustable timespan
    # todo: adjustable weeks_back

    weeks_back = int(weeks_back)
    weeks_duration = int(weeks_duration)

    if not weeks_duration:
        weeks_duration = 1

    when = get_when(weeks_back)

    # looks a lot like graphs, but then just subtract/add some values and done (?)

    # compare the first urlrating to the last urlrating
    # but do not include urls that don't exist.

    sql = """
        SELECT
            reporting_urlreport.id as id,
            calculation
        FROM
            reporting_urlreport
        INNER JOIN
            (
                SELECT MAX(id) as id2 FROM reporting_urlreport or2
                WHERE at_when <= '%(when)s' GROUP BY url_id
            ) as x
        ON x.id2 = reporting_urlreport.id
        INNER JOIN url ON reporting_urlreport.url_id = url.id
        INNER JOIN url_organization on url.id = url_organization.url_id
        INNER JOIN organization ON url_organization.organization_id = organization.id
        WHERE organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
            """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    newest_urlratings = UrlReport.objects.raw(sql)

    # this of course doesn't work with the first day, as then we didn't measure
    # everything (and the ratings for several issues are 0...
    sql = """
        SELECT reporting_urlreport.id as id, calculation FROM
            reporting_urlreport
        INNER JOIN
            (
                SELECT MAX(id) as id2 FROM reporting_urlreport or2
                WHERE at_when <= '%(when)s' GROUP BY url_id
            ) as x
        ON x.id2 = reporting_urlreport.id
        INNER JOIN url ON reporting_urlreport.url_id = url.id
        INNER JOIN url_organization on url.id = url_organization.url_id
        INNER JOIN organization ON url_organization.organization_id = organization.id
        WHERE organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
            """ % {
        "when": when - timedelta(days=(weeks_duration * 7)),
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    oldest_urlratings = UrlReport.objects.raw(sql)

    old_measurement = {}
    new_measurement = {}
    scan_types = []

    # stats for the newest, should be made a function:
    for urlrating in newest_urlratings:

        # url level, why are there reports without url ratings / empty url ratings like
        if "ratings" in urlrating.calculation:
            for rating in urlrating.calculation["ratings"]:
                if rating["type"] not in new_measurement:
                    new_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                new_measurement[rating["type"]]["high"] += rating["high"]
                new_measurement[rating["type"]]["medium"] += rating["medium"]
                new_measurement[rating["type"]]["low"] += rating["low"]

        if "endpoints" not in urlrating.calculation:
            continue

        for endpoint in urlrating.calculation["endpoints"]:
            for rating in endpoint["ratings"]:
                if rating["type"] not in new_measurement:
                    new_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                new_measurement[rating["type"]]["high"] += rating["high"]
                new_measurement[rating["type"]]["medium"] += rating["medium"]
                new_measurement[rating["type"]]["low"] += rating["low"]

    # and the oldest stats, which should be the same function
    for urlrating in oldest_urlratings:

        if "ratings" in urlrating.calculation:
            for rating in urlrating.calculation["ratings"]:
                if rating["type"] not in old_measurement:
                    old_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                old_measurement[rating["type"]]["high"] += rating["high"]
                old_measurement[rating["type"]]["medium"] += rating["medium"]
                old_measurement[rating["type"]]["low"] += rating["low"]

        if "endpoints" not in urlrating.calculation:
            continue

        for endpoint in urlrating.calculation["endpoints"]:
            for rating in endpoint["ratings"]:
                if rating["type"] not in old_measurement:
                    old_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                old_measurement[rating["type"]]["high"] += rating["high"]
                old_measurement[rating["type"]]["medium"] += rating["medium"]
                old_measurement[rating["type"]]["low"] += rating["low"]

    # and now do some magic to see the changes in this timespan:
    changes = {}
    for scan_type in scan_types:
        if scan_type not in changes:
            changes[scan_type] = {}

        if scan_type not in old_measurement:
            old_measurement[scan_type] = {}

        if scan_type not in new_measurement:
            new_measurement[scan_type] = {}

        changes[scan_type] = {
            "old": {
                "date": timezone.now() - timedelta(days=(weeks_duration * 7)),
                "high": old_measurement[scan_type].get("high", 0),
                "medium": old_measurement[scan_type].get("medium", 0),
                "low": old_measurement[scan_type].get("low", 0),
            },
            "new": {
                "date": when,
                "high": new_measurement[scan_type].get("high", 0),
                "medium": new_measurement[scan_type].get("medium", 0),
                "low": new_measurement[scan_type].get("low", 0),
            },
            "improvements": {
                "high":
                old_measurement[scan_type].get("high", 0) -
                new_measurement[scan_type].get("high", 0),
                "medium":
                old_measurement[scan_type].get("medium", 0) -
                new_measurement[scan_type].get("medium", 0),
                "low":
                old_measurement[scan_type].get("low", 0) -
                new_measurement[scan_type].get("low", 0),
            },
        }

    # and now for overall changes, what everyone is coming for...
    for scan_type in scan_types:
        changes["overall"] = {
            "old": {
                "high":
                changes.get("overall", {}).get("old", {}).get("high", 0) +
                changes[scan_type]["old"]["high"],
                "medium":
                changes.get("overall", {}).get("old", {}).get("medium", 0) +
                changes[scan_type]["old"]["medium"],
                "low":
                changes.get("overall", {}).get("old", {}).get("low", 0) +
                changes[scan_type]["old"]["low"],
            },
            "new": {
                "high":
                changes.get("overall", {}).get("new", {}).get("high", 0) +
                changes[scan_type]["new"]["high"],
                "medium":
                changes.get("overall", {}).get("new", {}).get("medium", 0) +
                changes[scan_type]["new"]["medium"],
                "low":
                changes.get("overall", {}).get("new", {}).get("low", 0) +
                changes[scan_type]["new"]["low"],
            },
            "improvements": {
                "high":
                changes.get("overall", {}).get("improvements", {}).get(
                    "high", 0) + changes[scan_type]["improvements"]["high"],
                "medium":
                changes.get("overall", {}).get("improvements", {}).get(
                    "medium", 0) +
                changes[scan_type]["improvements"]["medium"],
                "low":
                changes.get("overall", {}).get("improvements", {}).get(
                    "low", 0) + changes[scan_type]["improvements"]["low"],
            },
        }

    return changes
Beispiel #16
0
def get_all_latest_scans(country, organization_type):

    dataset = {
        "scans": defaultdict(list),
        "render_date": timezone.now().isoformat(),
        "remark": remark,
    }

    filtered_organization_type = get_organization_type(organization_type)
    filtered_country = get_country(country)

    # Really get the latest, without double results that apply for multiple organizations.
    # Do not show anything that is dead, on any level.
    for scan_type in PUBLISHED_ENDPOINT_SCAN_TYPES:
        scans = (
            EndpointGenericScan.objects.filter(
                type=scan_type,
                is_the_latest_scan=True,
            )
            .annotate(
                n_urls=Count(
                    "endpoint",
                    filter=Q(
                        endpoint__is_dead=False,
                        endpoint__url__not_resolvable=False,
                        endpoint__url__is_dead=False,
                        endpoint__url__organization__is_dead=False,
                        endpoint__url__organization__country=filtered_country,
                        endpoint__url__organization__type_id=filtered_organization_type,
                    ),
                )
            )
            .filter(n_urls__gte=1)
            .order_by("-rating_determined_on")[0:6]
        )

        print(scans.query)

        for scan in scans:
            calculation = get_severity(scan)

            dataset["scans"][scan_type].append(
                {
                    "url": scan.endpoint.url.url,
                    "service": f"{scan.endpoint.protocol}/{scan.endpoint.port} (IPv{scan.endpoint.ip_version})",
                    "protocol": scan.endpoint.protocol,
                    "port": scan.endpoint.port,
                    "ip_version": scan.endpoint.ip_version,
                    "explanation": calculation.get("explanation", ""),
                    "high": calculation.get("high", 0),
                    "medium": calculation.get("medium", 0),
                    "low": calculation.get("low", 0),
                    "last_scan_humanized": naturaltime(scan.last_scan_moment),
                    "last_scan_moment": scan.last_scan_moment.isoformat(),
                }
            )

    for scan_type in PUBLISHED_URL_SCAN_TYPES:

        scans = (
            UrlGenericScan.objects.filter(
                type=scan_type,
                is_the_latest_scan=True,
            )
            .annotate(
                n_urls=Count(
                    "url",
                    filter=Q(
                        url__organization__is_dead=False,
                        url__organization__country=filtered_country,
                        url__organization__type_id=filtered_organization_type,
                        url__is_dead=False,
                        url__not_resolvable=False,
                    ),
                )
            )
            .filter(n_urls=1)
            .order_by("-rating_determined_on")[0:6]
        )

        for scan in scans:
            calculation = get_severity(scan)

            # url scans
            dataset["scans"][scan_type].append(
                {
                    "url": scan.url.url,
                    "service": f"{scan.url.url}",
                    "protocol": scan_type,
                    "port": "-",
                    "ip_version": "-",
                    "explanation": calculation.get("explanation", ""),
                    "high": calculation.get("high", 0),
                    "medium": calculation.get("medium", 0),
                    "low": calculation.get("low", 0),
                    "last_scan_humanized": naturaltime(scan.last_scan_moment),
                    "last_scan_moment": scan.last_scan_moment.isoformat(),
                }
            )

    return dataset
Beispiel #17
0
def get_ticker_data(
    country: str = "NL", organization_type: str = "municipality", weeks_back: int = 0, weeks_duration: int = 0
):

    weeks_back = int(weeks_back)
    weeks_duration = int(weeks_duration)

    # Gives ticker data of organizations, like in news scrolling:
    # On organization level, could be on URL level in the future (selecing more cool urls?)
    # Organizations are far more meaningful.
    # Amsterdam 42 +1, 32 +2, 12 -, Zutphen 12 -3, 32 -1, 3 +3, etc.

    if not weeks_duration:
        weeks_duration = 10

    when = get_when(weeks_back)

    # looks a lot like graphs, but then just subtract/add some values and done (?)

    # compare the first urlrating to the last urlrating
    # but do not include urls that don't exist.

    sql = """
        SELECT
            map_organizationreport.id as id,
            name,
            high,
            medium,
            low
        FROM
            map_organizationreport
        INNER JOIN
           (
                SELECT MAX(or2.id) as id2
                FROM map_organizationreport or2
                INNER JOIN organization as filter_organization
                ON (filter_organization.id = or2.organization_id)
                WHERE
                    at_when <= '%(when)s'
                    AND filter_organization.country='%(country)s'
                    AND filter_organization.type_id=%(OrganizationTypeId)s
               GROUP BY organization_id
            ) as stacked_organizationreport
        ON stacked_organizationreport.id2 = map_organizationreport.id
        INNER JOIN organization ON map_organizationreport.organization_id = organization.id
        WHERE
        (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
           AND organization.is_dead = 1
           ) OR (
           organization.created_on <= '%(when)s'
           AND organization.is_dead = 0
        ))
        AND organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
        AND total_urls > 0
        """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    newest_urlratings = list(OrganizationReport.objects.raw(sql))

    # this of course doesn't work with the first day, as then we didn't measure
    # everything (and the ratings for several issues are 0...
    sql = """
        SELECT
            map_organizationreport.id as id,
            name,
            high,
            medium,
            low
        FROM
               map_organizationreport
        INNER JOIN
            (
                SELECT MAX(or2.id) as id2
                FROM map_organizationreport or2
                INNER JOIN organization as filter_organization
                ON (filter_organization.id = or2.organization_id)
                WHERE
                    at_when <= '%(when)s'
                    AND filter_organization.country='%(country)s'
                    AND filter_organization.type_id=%(OrganizationTypeId)s
               GROUP BY organization_id
            ) as stacked_organizationreport
        ON stacked_organizationreport.id2 = map_organizationreport.id
        INNER JOIN organization ON map_organizationreport.organization_id = organization.id
        WHERE
        (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
               AND organization.is_dead = 1
               ) OR (
               organization.created_on <= '%(when)s'
               AND organization.is_dead = 0
        ))
        AND organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
        AND total_urls > 0
        """ % {
        "when": when - timedelta(days=(weeks_duration * 7)),
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    oldest_urlratings = list(OrganizationReport.objects.raw(sql))

    # create a dict, where the keys are pointing to the ratings. This makes it easy to match the
    # correct ones. And handle missing oldest ratings for example.
    oldest_urlratings_dict = {}
    for oldest_urlrating in oldest_urlratings:
        oldest_urlratings_dict[oldest_urlrating.name] = oldest_urlrating

    # insuccesful rebuild? Or not enough organizations?
    if not newest_urlratings:
        return {"changes": {}, "slogan": config.TICKER_SLOGAN}

    changes = []
    for newest_urlrating in newest_urlratings:

        try:
            matching_oldest = oldest_urlratings_dict[newest_urlrating.name]
        except KeyError:
            matching_oldest = None

        if not matching_oldest:
            high_then = medium_then = low_then = "-"
            high_changes = newest_urlrating.high
            medium_changes = newest_urlrating.medium
            low_changes = newest_urlrating.low

        else:
            high_then = matching_oldest.high
            medium_then = matching_oldest.medium
            low_then = matching_oldest.low
            high_changes = newest_urlrating.high - matching_oldest.high
            medium_changes = newest_urlrating.medium - matching_oldest.medium
            low_changes = newest_urlrating.low - matching_oldest.low

        change = {
            "organization": newest_urlrating.name,
            "high_now": newest_urlrating.high,
            "medium_now": newest_urlrating.medium,
            "low_now": newest_urlrating.low,
            "high_then": high_then,
            "medium_then": medium_then,
            "low_then": low_then,
            "high_changes": high_changes,
            "medium_changes": medium_changes,
            "low_changes": int(low_changes),
        }

        changes.append(change)

    data = {"changes": changes, "slogan": config.TICKER_SLOGAN}

    return data
Beispiel #18
0
def get_all_explains(country, organization_type, limit=0):

    limit = 999999 if limit == 0 else limit

    country = get_country(country)
    organization_type = get_organization_type(organization_type)

    # some urls are used by a lot of organizations, we only need the distinct explanation, and
    # the organizations will be matched later.
    # https://stackoverflow.com/questions/30752268/how-to-filter-objects-for-count-annotation-in-django

    # we're currently not taking into account what endpoint, URL's and organizations are alive at a certain point.
    # in that sense we're not taking history in account... Only what is relevant _NOW_
    # This should change, adding some nice complex Q conditions for checking a certain date.
    # something like: & (  Q(url__organization__is_dead=False, created_on__lte=some_date)
    #                    | Q(is_dead=True,  created_on__lte=some_date, is_dead_since__gte=some_date)
    #
    #                 & (  Q(is_dead=False, created_on__lte=some_date)
    #                    | Q(is_dead=True,  created_on__lte=some_date, is_dead_since__gte=some_date)
    ugss = (
        UrlGenericScan.objects.all()
        .filter(
            comply_or_explain_is_explained=True,
            is_the_latest_scan=True,
        )
        .annotate(
            n_urls=Count(
                "url",
                filter=Q(
                    url__organization__is_dead=False,
                    url__organization__country=country,
                    url__organization__type_id=organization_type,
                    url__is_dead=False,
                    url__not_resolvable=False,
                ),
            )
        )
        .filter(n_urls=1)
        .order_by("-comply_or_explain_explained_on")[0:limit]
    )

    egss = (
        EndpointGenericScan.objects.all()
        .filter(
            comply_or_explain_is_explained=True,
            is_the_latest_scan=True,
        )
        .annotate(
            n_urls=Count(
                "endpoint",
                filter=Q(
                    endpoint__is_dead=False,
                    endpoint__url__not_resolvable=False,
                    endpoint__url__is_dead=False,
                    endpoint__url__organization__is_dead=False,
                    endpoint__url__organization__country=country,
                    endpoint__url__organization__type_id=organization_type,
                ),
            )
        )
        .filter(n_urls__gte=1)
        .order_by("-comply_or_explain_explained_on")[0:limit]
    )

    explains = []

    for scan in ugss:
        explains.append(get_explanation("url", scan))

    for scan in egss:
        explains.append(get_explanation("endpoint", scan))

    # sorting
    explains = sorted(explains, key=lambda k: (k["explained_on"]), reverse=True)

    return explains