Esempio n. 1
0
def get_lastest_map_health_data(country: str = "NL", organization_type: str = "municipality") -> Dict[str, Any]:
    map_configuration = (
        Configuration.objects.all()
        .filter(country=get_country(country), organization_type=get_organization_type(organization_type))
        .first()
    )

    if not map_configuration:
        return {
            "outdate_period_in_hours": -1,
            "percentage_up_to_date": -1,
            "percentage_out_of_date": -1,
            "amount_up_to_date": -1,
            "amount_out_of_date": -1,
            "per_scan": [],
        }
    try:
        mh = MapHealthReport.objects.all().filter(map_configuration=map_configuration).latest("at_when")
    except MapHealthReport.DoesNotExist:
        return {
            "outdate_period_in_hours": -1,
            "percentage_up_to_date": -1,
            "percentage_out_of_date": -1,
            "amount_up_to_date": -1,
            "amount_out_of_date": -1,
            "per_scan": [],
        }

    return mh.detailed_report
Esempio n. 2
0
def get_cached_map_data(country: str = "NL",
                        organization_type: str = "municipality",
                        days_back: int = 0,
                        filters: List[str] = None):
    """
    Caching is split up into two queries. This is done on purpose, as MySQL cannot deal with text fields efficiently.

    MySQL will be very slow if there are filtering conditions if there is a Textfield in the result. Even if the
    textfield is not filtered on directly.

    To given an impression: this query used to take 7 seconds with just 20.000 records in the database and 760 in
    the corresponding key. After splitting it up into two queries, the total of both would be <0.5 seconds.
    """

    # prevent mutable default
    if not filters:
        filters = ["all"]

    cached = (MapDataCache.objects.all().filter(
        country=country,
        organization_type=get_organization_type(organization_type),
        at_when=datetime.now(pytz.utc) - relativedelta(days=int(days_back)),
        filters=filters,
    ).defer("dataset").first())

    if not cached:
        return False

    my_dataset = MapDataCache.objects.only("id", "dataset").get(id=cached.id)
    if not my_dataset:
        return False

    return my_dataset.dataset
Esempio n. 3
0
def get_stats_outdated(country, organization_type, weeks_back):
    """
    Stats are calculated using websecmap calculate_high_level_statistics

    :param country:
    :param organization_type:
    :param weeks_back:
    :return:
    """

    timeframes = {
        "now": 0,
        "7 days ago": 0,
        "2 weeks ago": 0,
        "3 weeks ago": 0,
        "1 months ago": 0,
        "2 months ago": 0,
        "3 months ago": 0,
    }

    reports = {}
    for stat in timeframes:
        when = stats_determine_when(stat, weeks_back).date()

        # seven queryies, but __never__ a missing result.
        stats = (HighLevelStatistic.objects.all().filter(
            country=country,
            organization_type=get_organization_type(organization_type),
            at_when__lte=when).order_by("-at_when").first())

        # no stats before a certain date, or database empty.
        if stats:
            reports[stat] = stats.report

    return reports
Esempio n. 4
0
def get_top_fail_data(country: str = "NL", organization_type="municipality", weeks_back=0):

    when = get_when(weeks_back)
    cursor = connection.cursor()

    sql = """
            SELECT
                low,
                organization.name,
                organizations_organizationtype.name,
                organization.id,
                at_when,
                organization.twitter_handle,
                high,
                medium,
                low,
                total_urls,
                total_endpoints
            FROM map_organizationreport
            INNER JOIN
              organization on organization.id = map_organizationreport.organization_id
            INNER JOIN
              organizations_organizationtype on organizations_organizationtype.id = organization.type_id
            INNER JOIN
              coordinate ON coordinate.organization_id = organization.id
            INNER JOIN
              (
                SELECT MAX(or2.id) as id2 FROM map_organizationreport or2
                INNER JOIN organization as org2 ON org2.id = or2.organization_id
                WHERE at_when <= '%(when)s'
                  AND org2.country = '%(country)s'
                  AND org2.type_id = '%(OrganizationTypeId)s'
                GROUP BY organization_id
              ) as stacked_organization_report
            ON stacked_organization_report.id2 = map_organizationreport.id
            WHERE
              (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
               AND organization.is_dead = 1
               ) OR (
               organization.created_on <= '%(when)s'
               AND organization.is_dead = 0
              ))
              AND organization.type_id = '%(OrganizationTypeId)s'
              AND organization.country = '%(country)s'
              AND total_urls > 0
            GROUP BY organization.name
            HAVING high > 0 or medium > 0
            ORDER BY high DESC, medium DESC, medium DESC, organization.name ASC
            """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    # log.debug(sql)
    cursor.execute(sql)
    rows = cursor.fetchall()
    return rows_to_dataset(rows, when)
Esempio n. 5
0
def get_organization_report_by_name(
    country: str = DEFAULT_COUNTRY, organization_type=DEFAULT_LAYER, organization_name=None, weeks_back=0
):

    organization = Organization.objects.filter(
        computed_name_slug=slugify(organization_name),
        country=get_country(country),
        type=get_organization_type(organization_type),
        is_dead=False,
    ).first()

    log.debug("- %s %s %s " % (organization_name, get_country(country), get_organization_type(organization_type)))
    log.debug(organization)

    if not organization:
        return {}

    return get_organization_report_by_id(organization.pk, weeks_back)
Esempio n. 6
0
def export_coordinates(country: str = "NL", organization_type="municipality"):
    organizations = Organization.objects.all().filter(
        country=get_country(country),
        type=get_organization_type(organization_type))

    columns = ["id", "organization", "geojsontype", "area"]
    query = (Coordinate.objects.all().filter(
        organization__in=list(organizations),
        is_dead=False).values_list("id", "organization", "geojsontype",
                                   "area"))

    return generic_export(query, columns)
Esempio n. 7
0
def export_urls(country: str = "NL", organization_type="municipality"):
    organizations = Organization.objects.all().filter(
        country=get_country(country),
        type=get_organization_type(organization_type))

    columns = ["id", "url", "organization"]

    query = (Url.objects.all().filter(organization__in=list(organizations),
                                      is_dead=False,
                                      not_resolvable=False).values_list(
                                          "id", "url", "organization"))

    return generic_export(query, columns)
Esempio n. 8
0
def get_2ndlevel_domains(country, layer):
    """
    This is a starting point for SIDN to upload information for domains. This is basically a list of all
    2nd level domains for a layer. Data for this is uploaded.
    """
    urls = (Url.objects.all().filter(
        Q(computed_subdomain__isnull=True) | Q(computed_subdomain=""),
        organization__country=get_country(country),
        organization__type=get_organization_type(layer),
    ).values_list("url", flat=True))

    urls = list(set(urls))

    return urls
Esempio n. 9
0
def get_stats(country, organization_type, weeks_back):
    """
    Stats are calculated using websecmap calculate_high_level_statistics

    :param country:
    :param organization_type:
    :param weeks_back:
    :return:
    """

    when = datetime.now(pytz.utc) - relativedelta(days=int(weeks_back * 7))

    # seven queryies, but __never__ a missing result.
    stats = (HighLevelStatistic.objects.all().filter(
        country=country,
        organization_type=get_organization_type(organization_type),
        at_when__lte=when).order_by("-at_when")[0:366])

    reports = {
        "organizations": [],
        "urls": [],
        "explained": {},
        "endpoints_now": 0,
        "endpoint": []
    }

    for stat in stats:
        r = stat.report
        reports["organizations"].append({
            "high": r["high"],
            "medium": r["medium"],
            "good": r["good"],
            "date": stat.at_when.isoformat()
        })
        reports["urls"].append({
            "high": r["high_urls"],
            "medium": r["medium_urls"],
            "good": r["good_urls"],
            "date": stat.at_when.isoformat(),
        })

    first = stats.first()
    if first:
        r = first.report
        reports["endpoint"] = r["endpoint"]
        reports["explained"] = r["explained"]
        reports["endpoints_now"] = r["endpoints"]

    return reports
Esempio n. 10
0
def export_urls_only(country: str = "NL", organization_type="municipality"):
    # how to reuse the column definition as both headers
    columns = [
        "id", "url", "not_resolvable", "is_dead", "computed_subdomain",
        "computed_domain", "computed_suffix"
    ]
    queryset = (Url.objects.all().filter(
        is_dead=False,
        not_resolvable=False,
        organization__is_dead=False,
        organization__country=get_country(country),
        organization__type=get_organization_type(organization_type),
    ).values_list("id", "url", "not_resolvable", "is_dead",
                  "computed_subdomain", "computed_domain", "computed_suffix"))
    return generic_export(queryset, columns)
Esempio n. 11
0
def get_vulnerability_graph(country, organization_type, weeks_back):

    organization_type_id = get_organization_type(organization_type)
    country = get_country(country)
    when = timezone.now() - relativedelta(weeks=int(weeks_back))

    one_year_ago = when - timedelta(days=365)

    data = (VulnerabilityStatistic.objects.all().filter(
        organization_type=organization_type_id,
        country=country,
        at_when__lte=when,
        at_when__gte=one_year_ago).order_by("scan_type", "at_when"))
    """
    Desired output:
      "security_headers_x_frame_options": [
        {
          "date": "2018-07-17",
          "high": 0,
          "medium": 3950,
          "low": 0
        },
        {
          "date": "2018-07-24",
          "high": 0,
          "medium": 2940,
          "low": 0
        },
    """
    stats = {}

    for statistic in data:
        if statistic.scan_type not in stats:
            stats[statistic.scan_type] = []

        stats[statistic.scan_type].append({
            "high": statistic.high,
            "medium": statistic.medium,
            "low": statistic.low,
            "date": statistic.at_when.isoformat(),
            "urls": statistic.urls,
            "ok_urls": statistic.ok_urls,
            "endpoints": statistic.endpoints,
            "ok_endpoints": statistic.ok_endpoints,
            "ok": statistic.ok,
        })

    return stats
Esempio n. 12
0
def what_to_improve(country: str, organization_type: str, issue_type: str):
    # todo: check if the layer is published.

    policy = POLICY.get(issue_type, None)
    if not policy:
        log.debug(f"No policy found for {issue_type}")
        return []

    country = get_country(country)
    organization_type = get_organization_type(organization_type)

    if issue_type in URL_SCAN_TYPES:
        return what_to_improve_ugs(country, organization_type, issue_type,
                                   policy)
    else:
        return what_to_improve_epgs(country, organization_type, issue_type,
                                    policy)
Esempio n. 13
0
def get_organization_vulnerability_timeline_via_name(
        organization_name: str,
        organization_type: str = "",
        country: str = ""):

    layer = get_organization_type(
        organization_type) if organization_type else get_default_layer()
    country = get_country(code=country) if country else get_default_country()

    organization = (Organization.objects.all().filter(country=country,
                                                      type=layer,
                                                      name=organization_name,
                                                      is_dead=False).first())

    if not organization:
        return {}

    return get_organization_vulnerability_timeline(organization.id)
Esempio n. 14
0
def export_organizations(country: str = "NL",
                         organization_type="municipality"):
    columns = ["id", "name", "type", "wikidata", "wikipedia", "twitter_handle"]
    query = (Organization.objects.all().filter(
        country=get_country(country),
        type=get_organization_type(organization_type),
        is_dead=False).values_list("id", "name", "type", "wikidata",
                                   "wikipedia", "twitter_handle"))
    # values doesn't work anymore to determine what columns are relevant for the export.
    # we need to set it on the factory.

    # This does not restrain the amount of fields, all fields are included.
    # the export documentation is pretty terrible (there is none) and the code is obscure. So we're not using this
    # Left here for references, if anyone decides that
    # from import_export.resources import modelresource_factory
    # is a wise choice to use. It's not / it hardly is.
    # exporter = modelresource_factory(query.model)
    # exporter.Meta.fields = ['id', 'name', 'type', 'wikidata', 'wikipedia', 'twitter_handle']
    # dataset = exporter().export(query)

    return generic_export(query, columns)
Esempio n. 15
0
def get_top_win_data(country: str = "NL", organization_type="municipality", weeks_back=0):
    """
    Can't use the object.raw syntax of django. OperationalError: near "%": syntax error...
    Probably not supported, although the manual says so. Maybe because we use a multiline string?

    https://code.djangoproject.com/ticket/10070
    Dictionary params are not supported with the SQLite backend; with this backend, you must pass parameters as a list.

    When doing it right, the exception happens: format requires a mapping. Then just rely on the checks we do.
    :param country:
    :param organization_type:
    :param weeks_back:
    :return:
    """
    when = get_when(weeks_back)

    cursor = connection.cursor()
    sql = """
            SELECT
              low,
              organization.name,
              organizations_organizationtype.name,
              organization.id,
              at_when,
              organization.twitter_handle,
              high,
              medium,
              low,
              total_urls,
              total_endpoints,
              organization.is_dead
            FROM map_organizationreport
            INNER JOIN
              organization on organization.id = map_organizationreport.organization_id
            INNER JOIN
              organizations_organizationtype on organizations_organizationtype.id = organization.type_id
            INNER JOIN
              coordinate ON coordinate.organization_id = organization.id
            INNER JOIN
              (
                SELECT MAX(or2.id) as id2 FROM map_organizationreport or2
                INNER JOIN organization as org2 ON org2.id = or2.organization_id
                WHERE at_when <= '%(when)s'
                  AND org2.country = '%(country)s'
                  AND org2.type_id = '%(OrganizationTypeId)s'
                GROUP BY organization_id
              ) as stacked_organization_report
            ON stacked_organization_report.id2 = map_organizationreport.id
            WHERE
              (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
               AND organization.is_dead = 1
               ) OR (
               organization.created_on <= '%(when)s'
               AND organization.is_dead = 0
              ))
              AND organization.type_id = '%(OrganizationTypeId)s'
              AND organization.country = '%(country)s'
              AND total_urls > 0
            GROUP BY organization.name
            HAVING high = 0 AND medium = 0
            ORDER BY low ASC, total_endpoints DESC, organization.name ASC
            """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    # log.debug(sql)
    cursor.execute(sql)
    rows = cursor.fetchall()
    return rows_to_dataset(rows, when)
Esempio n. 16
0
def get_map_data(country: str = "NL",
                 organization_type: str = "municipality",
                 days_back: int = 0,
                 displayed_issue: str = None):

    # A bug in the live version (possibly due to |safe) gives a Country(code='NL')} instead of
    # the country code of the country. Here this is worked around, but something is wrong and should be fixed.
    if hasattr(country, "code"):
        country = country.code

    when = datetime.now(pytz.utc) - relativedelta(days=int(days_back))

    desired_url_scans = []
    desired_endpoint_scans = []

    if displayed_issue in URL_SCAN_TYPES:
        desired_url_scans += [displayed_issue]

    if displayed_issue in ENDPOINT_SCAN_TYPES:
        desired_endpoint_scans += [displayed_issue]

    # fallback if no data is "all", which is the default.
    if not desired_url_scans and not desired_endpoint_scans:
        desired_url_scans = URL_SCAN_TYPES
        desired_endpoint_scans = ENDPOINT_SCAN_TYPES

        filters = ["all"]
    else:
        filters = desired_url_scans + desired_endpoint_scans

    cached = get_cached_map_data(country, organization_type, days_back,
                                 filters)

    if cached:
        return cached
    """
    Returns a json structure containing all current map data.
    This is used by the client to render the map.

    Renditions of this dataset might be pushed to gitlab automatically.

    :return:
    """

    data = {
        "metadata": {
            "type": "FeatureCollection",
            "render_date": datetime.now(pytz.utc).isoformat(),
            "data_from_time": when.isoformat(),
            "remark": remark,
            "applied filter": displayed_issue,
            "layer": organization_type,
            "country": country,
        },
        "crs": {
            "type": "name",
            "properties": {
                "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
            }
        },
        "features": [],
    }

    cursor = connection.cursor()

    # Sept 2019: MySQL has an issue with mediumtext fields. When joined, and the query is not optimized, the
    # result will take 2 minutes to complete. Would you not select the mediumtext field, the query finishes in a second.
    # That is why there are two queries to retrieve map data from the database.
    sql = """
        SELECT
            map_organizationreport.low,
            organization.name,
            organizations_organizationtype.name,
            coordinate_stack.area,
            coordinate_stack.geoJsonType,
            organization.id,
            map_organizationreport.id as organization_report_id,
            map_organizationreport.high,
            map_organizationreport.medium,
            map_organizationreport.low,
            map_organizationreport.total_issues,
            map_organizationreport.total_urls,
            map_organizationreport.high_urls,
            map_organizationreport.medium_urls,
            map_organizationreport.low_urls,
            coordinate_stack.stacked_coordinate_id
        FROM map_organizationreport
        INNER JOIN


          (SELECT stacked_organization.id as stacked_organization_id
          FROM organization stacked_organization
          WHERE (
            stacked_organization.created_on <= '%(when)s'
            AND stacked_organization.is_dead = 0
            AND stacked_organization.type_id=%(OrganizationTypeId)s
            AND stacked_organization.country='%(country)s'
            )
          OR (
          '%(when)s' BETWEEN stacked_organization.created_on AND stacked_organization.is_dead_since
            AND stacked_organization.is_dead = 1
            AND stacked_organization.type_id=%(OrganizationTypeId)s
            AND stacked_organization.country='%(country)s'
          )) as organization_stack
          ON organization_stack.stacked_organization_id = map_organizationreport.organization_id


        INNER JOIN
          organization on organization.id = stacked_organization_id
        INNER JOIN
          organizations_organizationtype on organizations_organizationtype.id = organization.type_id
        INNER JOIN


          (SELECT MAX(stacked_coordinate.id) as stacked_coordinate_id, area, geoJsonType, organization_id
          FROM coordinate stacked_coordinate
          INNER JOIN organization filter_organization
            ON (stacked_coordinate.organization_id = filter_organization.id)
          WHERE (
            stacked_coordinate.created_on <= '%(when)s'
            AND stacked_coordinate.is_dead = 0
            AND filter_organization.country='%(country)s'
            AND filter_organization.type_id=%(OrganizationTypeId)s
            )
          OR
            ('%(when)s' BETWEEN stacked_coordinate.created_on AND stacked_coordinate.is_dead_since
            AND stacked_coordinate.is_dead = 1
            AND filter_organization.country='%(country)s'
            AND filter_organization.type_id=%(OrganizationTypeId)s
            ) GROUP BY area, organization_id
          ) as coordinate_stack
          ON coordinate_stack.organization_id = map_organizationreport.organization_id


        INNER JOIN


          (SELECT MAX(map_organizationreport.id) as stacked_organizationrating_id
          FROM map_organizationreport
          INNER JOIN organization filter_organization2
            ON (filter_organization2.id = map_organizationreport.organization_id)
          WHERE at_when <= '%(when)s'
          AND filter_organization2.country='%(country)s'
          AND filter_organization2.type_id=%(OrganizationTypeId)s
          GROUP BY organization_id
          ) as stacked_organizationrating
          ON stacked_organizationrating.stacked_organizationrating_id = map_organizationreport.id


        WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country= '%(country)s'
        GROUP BY coordinate_stack.area, organization.name
        ORDER BY map_organizationreport.at_when ASC
        """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    cursor.execute(sql)
    rows = cursor.fetchall()

    needed_reports = []
    for i in rows:
        # prevent sequence item 0: expected str instance, int found
        needed_reports.append(str(i[6]))

    reports = get_reports_by_ids(needed_reports)

    # todo: http://www.gadzmo.com/python/using-pythons-dictcursor-in-mysql-to-return-a-dict-with-keys/
    # unfortunately numbered results are used. There is no decent solution for sqlite and the column to dict
    # translation is somewhat hairy. A rawquery would probably be better if possible.

    for i in rows:

        # Here we're going to do something stupid: to rebuild the high, medium, low classifcation based on scan_types
        # It's somewhat insane to do it like this, but it's also insane to keep adding columns for each vulnerability
        # that's added to the system. This solution will be a bit slow, but given the caching and such it wouldn't
        # hurt too much.
        # Also: we've optimized for calculation in the past, but we're not even using it until now. So that part of
        # this code is pretty optimized :)
        # This feature is created to give an instant overview of what issues are where. This will lead more clicks to
        # reports.
        # The caching of this url should be decent, as people want to click fast. Filtering on the client
        # would be possible using the calculation field. Perhaps that should be the way. Yet then we have to do
        # filtering with javascript, which is error prone (todo: this will be done in the future, as it responds faster
        # but it will also mean an enormous increase of data sent to the client.)
        # It's actually reasonably fast.
        high, medium, low, ok = 0, 0, 0, 0

        calculation = json.loads(reports[i[6]])

        for url in calculation["organization"]["urls"]:
            for url_rating in url["ratings"]:
                if (url_rating["type"] in desired_url_scans and url_rating.get(
                        "comply_or_explain_valid_at_time_of_report", False) is
                        False):
                    high += url_rating["high"]
                    medium += url_rating["medium"]
                    low += url_rating["low"]
                    ok += url_rating["ok"]

            # it's possible the url doesn't have ratings.
            for endpoint in url["endpoints"]:
                for endpoint_rating in endpoint["ratings"]:
                    if (endpoint_rating["type"] in desired_endpoint_scans
                            and endpoint_rating.get(
                                "comply_or_explain_valid_at_time_of_report",
                                False) is False):
                        high += endpoint_rating["high"]
                        medium += endpoint_rating["medium"]
                        low += endpoint_rating["low"]
                        ok += endpoint_rating["ok"]

        # figure out if red, orange or green:
        # #162, only make things red if there is a critical issue.
        # removed json parsing of the calculation. This saves time.
        # no contents, no endpoint ever mentioned in any url (which is a standard attribute)
        if "total_urls" not in calculation["organization"] or not calculation[
                "organization"]["total_urls"]:
            severity = "unknown"
        else:
            # things have to be OK in order to be colored. If it's all empty... then it's not OK.
            severity = "high" if high else "medium" if medium else "low" if low else "good" if ok else "unknown"

        dataset = {
            "type": "Feature",
            "properties": {
                "organization_id": i[5],
                "organization_type": i[2],
                "organization_name": i[1],
                "organization_name_lowercase": i[1].lower(),
                "organization_slug": slugify(i[1]),
                "additional_keywords": extract_domains(calculation),
                "high": high,
                "medium": medium,
                "low": low,
                "data_from": when.isoformat(),
                "severity": severity,
                "total_urls": i[11],  # = 100%
                "high_urls": i[12],
                "medium_urls": i[13],
                "low_urls": i[14],
            },
            "geometry": {
                # the coordinate ID makes it easy to check if the geometry has changed shape/location.
                "coordinate_id": i[15],
                "type": i[4],
                # Sometimes the data is a string, sometimes it's a list. The admin
                # interface might influence this. The fastest would be to use a string, instead of
                # loading some json.
                "coordinates": proper_coordinate(i[3], i[4]),
            },
        }

        # calculate some statistics, so the frontends do not have to...
        # prevent division by zero
        if i[11]:
            total_urls = int(i[11])
            high_urls = int(i[12])
            medium_urls = int(i[13])
            low_urls = int(i[14])
            dataset["properties"]["percentages"] = {
                "high_urls":
                round(high_urls / total_urls, 2) * 100,
                "medium_urls":
                round(medium_urls / total_urls, 2) * 100,
                "low_urls":
                round(low_urls / total_urls, 2) * 100,
                "good_urls":
                round((total_urls -
                       (high_urls + medium_urls + low_urls)) / total_urls, 2) *
                100,
            }
        else:
            dataset["properties"]["percentages"] = {
                "high_urls": 0,
                "medium_urls": 0,
                "low_urls": 0,
                "good_urls": 0,
            }

        data["features"].append(dataset)

    return data
Esempio n. 17
0
def get_improvements(country, organization_type, weeks_back, weeks_duration):
    # todo: adjustable timespan
    # todo: adjustable weeks_back

    weeks_back = int(weeks_back)
    weeks_duration = int(weeks_duration)

    if not weeks_duration:
        weeks_duration = 1

    when = get_when(weeks_back)

    # looks a lot like graphs, but then just subtract/add some values and done (?)

    # compare the first urlrating to the last urlrating
    # but do not include urls that don't exist.

    sql = """
        SELECT
            reporting_urlreport.id as id,
            calculation
        FROM
            reporting_urlreport
        INNER JOIN
            (
                SELECT MAX(id) as id2 FROM reporting_urlreport or2
                WHERE at_when <= '%(when)s' GROUP BY url_id
            ) as x
        ON x.id2 = reporting_urlreport.id
        INNER JOIN url ON reporting_urlreport.url_id = url.id
        INNER JOIN url_organization on url.id = url_organization.url_id
        INNER JOIN organization ON url_organization.organization_id = organization.id
        WHERE organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
            """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    newest_urlratings = UrlReport.objects.raw(sql)

    # this of course doesn't work with the first day, as then we didn't measure
    # everything (and the ratings for several issues are 0...
    sql = """
        SELECT reporting_urlreport.id as id, calculation FROM
            reporting_urlreport
        INNER JOIN
            (
                SELECT MAX(id) as id2 FROM reporting_urlreport or2
                WHERE at_when <= '%(when)s' GROUP BY url_id
            ) as x
        ON x.id2 = reporting_urlreport.id
        INNER JOIN url ON reporting_urlreport.url_id = url.id
        INNER JOIN url_organization on url.id = url_organization.url_id
        INNER JOIN organization ON url_organization.organization_id = organization.id
        WHERE organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
            """ % {
        "when": when - timedelta(days=(weeks_duration * 7)),
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    oldest_urlratings = UrlReport.objects.raw(sql)

    old_measurement = {}
    new_measurement = {}
    scan_types = []

    # stats for the newest, should be made a function:
    for urlrating in newest_urlratings:

        # url level, why are there reports without url ratings / empty url ratings like
        if "ratings" in urlrating.calculation:
            for rating in urlrating.calculation["ratings"]:
                if rating["type"] not in new_measurement:
                    new_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                new_measurement[rating["type"]]["high"] += rating["high"]
                new_measurement[rating["type"]]["medium"] += rating["medium"]
                new_measurement[rating["type"]]["low"] += rating["low"]

        if "endpoints" not in urlrating.calculation:
            continue

        for endpoint in urlrating.calculation["endpoints"]:
            for rating in endpoint["ratings"]:
                if rating["type"] not in new_measurement:
                    new_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                new_measurement[rating["type"]]["high"] += rating["high"]
                new_measurement[rating["type"]]["medium"] += rating["medium"]
                new_measurement[rating["type"]]["low"] += rating["low"]

    # and the oldest stats, which should be the same function
    for urlrating in oldest_urlratings:

        if "ratings" in urlrating.calculation:
            for rating in urlrating.calculation["ratings"]:
                if rating["type"] not in old_measurement:
                    old_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                old_measurement[rating["type"]]["high"] += rating["high"]
                old_measurement[rating["type"]]["medium"] += rating["medium"]
                old_measurement[rating["type"]]["low"] += rating["low"]

        if "endpoints" not in urlrating.calculation:
            continue

        for endpoint in urlrating.calculation["endpoints"]:
            for rating in endpoint["ratings"]:
                if rating["type"] not in old_measurement:
                    old_measurement[rating["type"]] = {
                        "high": 0,
                        "medium": 0,
                        "low": 0
                    }

                if rating["type"] not in scan_types:
                    scan_types.append(rating["type"])

                old_measurement[rating["type"]]["high"] += rating["high"]
                old_measurement[rating["type"]]["medium"] += rating["medium"]
                old_measurement[rating["type"]]["low"] += rating["low"]

    # and now do some magic to see the changes in this timespan:
    changes = {}
    for scan_type in scan_types:
        if scan_type not in changes:
            changes[scan_type] = {}

        if scan_type not in old_measurement:
            old_measurement[scan_type] = {}

        if scan_type not in new_measurement:
            new_measurement[scan_type] = {}

        changes[scan_type] = {
            "old": {
                "date": timezone.now() - timedelta(days=(weeks_duration * 7)),
                "high": old_measurement[scan_type].get("high", 0),
                "medium": old_measurement[scan_type].get("medium", 0),
                "low": old_measurement[scan_type].get("low", 0),
            },
            "new": {
                "date": when,
                "high": new_measurement[scan_type].get("high", 0),
                "medium": new_measurement[scan_type].get("medium", 0),
                "low": new_measurement[scan_type].get("low", 0),
            },
            "improvements": {
                "high":
                old_measurement[scan_type].get("high", 0) -
                new_measurement[scan_type].get("high", 0),
                "medium":
                old_measurement[scan_type].get("medium", 0) -
                new_measurement[scan_type].get("medium", 0),
                "low":
                old_measurement[scan_type].get("low", 0) -
                new_measurement[scan_type].get("low", 0),
            },
        }

    # and now for overall changes, what everyone is coming for...
    for scan_type in scan_types:
        changes["overall"] = {
            "old": {
                "high":
                changes.get("overall", {}).get("old", {}).get("high", 0) +
                changes[scan_type]["old"]["high"],
                "medium":
                changes.get("overall", {}).get("old", {}).get("medium", 0) +
                changes[scan_type]["old"]["medium"],
                "low":
                changes.get("overall", {}).get("old", {}).get("low", 0) +
                changes[scan_type]["old"]["low"],
            },
            "new": {
                "high":
                changes.get("overall", {}).get("new", {}).get("high", 0) +
                changes[scan_type]["new"]["high"],
                "medium":
                changes.get("overall", {}).get("new", {}).get("medium", 0) +
                changes[scan_type]["new"]["medium"],
                "low":
                changes.get("overall", {}).get("new", {}).get("low", 0) +
                changes[scan_type]["new"]["low"],
            },
            "improvements": {
                "high":
                changes.get("overall", {}).get("improvements", {}).get(
                    "high", 0) + changes[scan_type]["improvements"]["high"],
                "medium":
                changes.get("overall", {}).get("improvements", {}).get(
                    "medium", 0) +
                changes[scan_type]["improvements"]["medium"],
                "low":
                changes.get("overall", {}).get("improvements", {}).get(
                    "low", 0) + changes[scan_type]["improvements"]["low"],
            },
        }

    return changes
Esempio n. 18
0
def get_all_latest_scans(country, organization_type):

    dataset = {
        "scans": defaultdict(list),
        "render_date": timezone.now().isoformat(),
        "remark": remark,
    }

    filtered_organization_type = get_organization_type(organization_type)
    filtered_country = get_country(country)

    # Really get the latest, without double results that apply for multiple organizations.
    # Do not show anything that is dead, on any level.
    for scan_type in PUBLISHED_ENDPOINT_SCAN_TYPES:
        scans = (
            EndpointGenericScan.objects.filter(
                type=scan_type,
                is_the_latest_scan=True,
            )
            .annotate(
                n_urls=Count(
                    "endpoint",
                    filter=Q(
                        endpoint__is_dead=False,
                        endpoint__url__not_resolvable=False,
                        endpoint__url__is_dead=False,
                        endpoint__url__organization__is_dead=False,
                        endpoint__url__organization__country=filtered_country,
                        endpoint__url__organization__type_id=filtered_organization_type,
                    ),
                )
            )
            .filter(n_urls__gte=1)
            .order_by("-rating_determined_on")[0:6]
        )

        print(scans.query)

        for scan in scans:
            calculation = get_severity(scan)

            dataset["scans"][scan_type].append(
                {
                    "url": scan.endpoint.url.url,
                    "service": f"{scan.endpoint.protocol}/{scan.endpoint.port} (IPv{scan.endpoint.ip_version})",
                    "protocol": scan.endpoint.protocol,
                    "port": scan.endpoint.port,
                    "ip_version": scan.endpoint.ip_version,
                    "explanation": calculation.get("explanation", ""),
                    "high": calculation.get("high", 0),
                    "medium": calculation.get("medium", 0),
                    "low": calculation.get("low", 0),
                    "last_scan_humanized": naturaltime(scan.last_scan_moment),
                    "last_scan_moment": scan.last_scan_moment.isoformat(),
                }
            )

    for scan_type in PUBLISHED_URL_SCAN_TYPES:

        scans = (
            UrlGenericScan.objects.filter(
                type=scan_type,
                is_the_latest_scan=True,
            )
            .annotate(
                n_urls=Count(
                    "url",
                    filter=Q(
                        url__organization__is_dead=False,
                        url__organization__country=filtered_country,
                        url__organization__type_id=filtered_organization_type,
                        url__is_dead=False,
                        url__not_resolvable=False,
                    ),
                )
            )
            .filter(n_urls=1)
            .order_by("-rating_determined_on")[0:6]
        )

        for scan in scans:
            calculation = get_severity(scan)

            # url scans
            dataset["scans"][scan_type].append(
                {
                    "url": scan.url.url,
                    "service": f"{scan.url.url}",
                    "protocol": scan_type,
                    "port": "-",
                    "ip_version": "-",
                    "explanation": calculation.get("explanation", ""),
                    "high": calculation.get("high", 0),
                    "medium": calculation.get("medium", 0),
                    "low": calculation.get("low", 0),
                    "last_scan_humanized": naturaltime(scan.last_scan_moment),
                    "last_scan_moment": scan.last_scan_moment.isoformat(),
                }
            )

    return dataset
Esempio n. 19
0
def get_ticker_data(
    country: str = "NL", organization_type: str = "municipality", weeks_back: int = 0, weeks_duration: int = 0
):

    weeks_back = int(weeks_back)
    weeks_duration = int(weeks_duration)

    # Gives ticker data of organizations, like in news scrolling:
    # On organization level, could be on URL level in the future (selecing more cool urls?)
    # Organizations are far more meaningful.
    # Amsterdam 42 +1, 32 +2, 12 -, Zutphen 12 -3, 32 -1, 3 +3, etc.

    if not weeks_duration:
        weeks_duration = 10

    when = get_when(weeks_back)

    # looks a lot like graphs, but then just subtract/add some values and done (?)

    # compare the first urlrating to the last urlrating
    # but do not include urls that don't exist.

    sql = """
        SELECT
            map_organizationreport.id as id,
            name,
            high,
            medium,
            low
        FROM
            map_organizationreport
        INNER JOIN
           (
                SELECT MAX(or2.id) as id2
                FROM map_organizationreport or2
                INNER JOIN organization as filter_organization
                ON (filter_organization.id = or2.organization_id)
                WHERE
                    at_when <= '%(when)s'
                    AND filter_organization.country='%(country)s'
                    AND filter_organization.type_id=%(OrganizationTypeId)s
               GROUP BY organization_id
            ) as stacked_organizationreport
        ON stacked_organizationreport.id2 = map_organizationreport.id
        INNER JOIN organization ON map_organizationreport.organization_id = organization.id
        WHERE
        (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
           AND organization.is_dead = 1
           ) OR (
           organization.created_on <= '%(when)s'
           AND organization.is_dead = 0
        ))
        AND organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
        AND total_urls > 0
        """ % {
        "when": when,
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    newest_urlratings = list(OrganizationReport.objects.raw(sql))

    # this of course doesn't work with the first day, as then we didn't measure
    # everything (and the ratings for several issues are 0...
    sql = """
        SELECT
            map_organizationreport.id as id,
            name,
            high,
            medium,
            low
        FROM
               map_organizationreport
        INNER JOIN
            (
                SELECT MAX(or2.id) as id2
                FROM map_organizationreport or2
                INNER JOIN organization as filter_organization
                ON (filter_organization.id = or2.organization_id)
                WHERE
                    at_when <= '%(when)s'
                    AND filter_organization.country='%(country)s'
                    AND filter_organization.type_id=%(OrganizationTypeId)s
               GROUP BY organization_id
            ) as stacked_organizationreport
        ON stacked_organizationreport.id2 = map_organizationreport.id
        INNER JOIN organization ON map_organizationreport.organization_id = organization.id
        WHERE
        (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since
               AND organization.is_dead = 1
               ) OR (
               organization.created_on <= '%(when)s'
               AND organization.is_dead = 0
        ))
        AND organization.type_id = '%(OrganizationTypeId)s'
        AND organization.country = '%(country)s'
        AND total_urls > 0
        """ % {
        "when": when - timedelta(days=(weeks_duration * 7)),
        "OrganizationTypeId": get_organization_type(organization_type),
        "country": get_country(country),
    }

    oldest_urlratings = list(OrganizationReport.objects.raw(sql))

    # create a dict, where the keys are pointing to the ratings. This makes it easy to match the
    # correct ones. And handle missing oldest ratings for example.
    oldest_urlratings_dict = {}
    for oldest_urlrating in oldest_urlratings:
        oldest_urlratings_dict[oldest_urlrating.name] = oldest_urlrating

    # insuccesful rebuild? Or not enough organizations?
    if not newest_urlratings:
        return {"changes": {}, "slogan": config.TICKER_SLOGAN}

    changes = []
    for newest_urlrating in newest_urlratings:

        try:
            matching_oldest = oldest_urlratings_dict[newest_urlrating.name]
        except KeyError:
            matching_oldest = None

        if not matching_oldest:
            high_then = medium_then = low_then = "-"
            high_changes = newest_urlrating.high
            medium_changes = newest_urlrating.medium
            low_changes = newest_urlrating.low

        else:
            high_then = matching_oldest.high
            medium_then = matching_oldest.medium
            low_then = matching_oldest.low
            high_changes = newest_urlrating.high - matching_oldest.high
            medium_changes = newest_urlrating.medium - matching_oldest.medium
            low_changes = newest_urlrating.low - matching_oldest.low

        change = {
            "organization": newest_urlrating.name,
            "high_now": newest_urlrating.high,
            "medium_now": newest_urlrating.medium,
            "low_now": newest_urlrating.low,
            "high_then": high_then,
            "medium_then": medium_then,
            "low_then": low_then,
            "high_changes": high_changes,
            "medium_changes": medium_changes,
            "low_changes": int(low_changes),
        }

        changes.append(change)

    data = {"changes": changes, "slogan": config.TICKER_SLOGAN}

    return data
Esempio n. 20
0
def get_all_explains(country, organization_type, limit=0):

    limit = 999999 if limit == 0 else limit

    country = get_country(country)
    organization_type = get_organization_type(organization_type)

    # some urls are used by a lot of organizations, we only need the distinct explanation, and
    # the organizations will be matched later.
    # https://stackoverflow.com/questions/30752268/how-to-filter-objects-for-count-annotation-in-django

    # we're currently not taking into account what endpoint, URL's and organizations are alive at a certain point.
    # in that sense we're not taking history in account... Only what is relevant _NOW_
    # This should change, adding some nice complex Q conditions for checking a certain date.
    # something like: & (  Q(url__organization__is_dead=False, created_on__lte=some_date)
    #                    | Q(is_dead=True,  created_on__lte=some_date, is_dead_since__gte=some_date)
    #
    #                 & (  Q(is_dead=False, created_on__lte=some_date)
    #                    | Q(is_dead=True,  created_on__lte=some_date, is_dead_since__gte=some_date)
    ugss = (
        UrlGenericScan.objects.all()
        .filter(
            comply_or_explain_is_explained=True,
            is_the_latest_scan=True,
        )
        .annotate(
            n_urls=Count(
                "url",
                filter=Q(
                    url__organization__is_dead=False,
                    url__organization__country=country,
                    url__organization__type_id=organization_type,
                    url__is_dead=False,
                    url__not_resolvable=False,
                ),
            )
        )
        .filter(n_urls=1)
        .order_by("-comply_or_explain_explained_on")[0:limit]
    )

    egss = (
        EndpointGenericScan.objects.all()
        .filter(
            comply_or_explain_is_explained=True,
            is_the_latest_scan=True,
        )
        .annotate(
            n_urls=Count(
                "endpoint",
                filter=Q(
                    endpoint__is_dead=False,
                    endpoint__url__not_resolvable=False,
                    endpoint__url__is_dead=False,
                    endpoint__url__organization__is_dead=False,
                    endpoint__url__organization__country=country,
                    endpoint__url__organization__type_id=organization_type,
                ),
            )
        )
        .filter(n_urls__gte=1)
        .order_by("-comply_or_explain_explained_on")[0:limit]
    )

    explains = []

    for scan in ugss:
        explains.append(get_explanation("url", scan))

    for scan in egss:
        explains.append(get_explanation("endpoint", scan))

    # sorting
    explains = sorted(explains, key=lambda k: (k["explained_on"]), reverse=True)

    return explains