Beispiel #1
0
def api_private_website_stats() -> Response:
    """TODO
    ---
    responses:
      '200':
        description: TODO
    """
    # uses_pg_index counters_day_cc_asn_input_idx a BRIN index was not used at
    # all, but BTREE on (measurement_start_day, probe_cc, probe_asn, input)
    # made queries go from full scan to 50ms
    url = request.args.get("input")
    probe_cc = validate_probe_cc_query_param()
    probe_asn = validate_probe_asn_query_param()

    s = """SELECT
        toDate(measurement_start_time) AS test_day,
        countIf(anomaly = 't') as anomaly_count,
        countIf(confirmed = 't') as confirmed_count,
        countIf(msm_failure = 't') as failure_count,
        COUNT() AS total_count
        FROM fastpath
        WHERE measurement_start_time >= today() - interval '31 day'
        AND measurement_start_time < today()
        AND probe_cc =  :probe_cc
        AND probe_asn = :probe_asn
        AND input = :input
        GROUP BY test_day ORDER BY test_day
    """
    d = {"probe_cc": probe_cc, "probe_asn": probe_asn, "input": url}
    results = query_click(sql.text(s), d)
    return cachedjson("0s", results=results)
Beispiel #2
0
def api_private_circumvention_runtime_stats() -> Response:
    """Runtime statistics on protocols used for circumvention,
    grouped by date, country, test_name.
    ---
    responses:
      200:
        description: List of dicts with keys probe_cc and cnt
    """
    q = """SELECT
        toDate(measurement_start_time) AS date,
        test_name,
        probe_cc,
        quantile(.5)(JSONExtractFloat(scores, 'extra', 'test_runtime')) AS p50,
        quantile(.9)(JSONExtractFloat(scores, 'extra', 'test_runtime')) AS p90,
        count() as cnt
    FROM fastpath
    WHERE test_name IN ['torsf', 'tor', 'stunreachability', 'psiphon','riseupvpn']
    AND measurement_start_time > today() - interval 6 month
    AND measurement_start_time < today() - interval 1 day
    AND JSONHas(scores, 'extra', 'test_runtime')
    GROUP BY date, probe_cc, test_name
    """
    try:
        r = query_click(sql.text(q), {})
        result = pivot_circumvention_runtime_stats(r)
        return cachedjson("1d", v=0, results=result)

    except Exception as e:
        return jsonify({"v": 0, "error": str(e)})
Beispiel #3
0
def fetch_reactive_url_list(cc: str, probe_asn: int):
    """Select all citizenlab URLs for the given probe_cc + ZZ
    Select measurements count from the current and previous week
    using a left outer join (without any info about priority)"""
    q = """
SELECT category_code, domain, url, cc, COALESCE(msmt_cnt, 0) AS msmt_cnt
FROM (
    SELECT domain, url, cc, category_code
    FROM citizenlab
    WHERE
      citizenlab.cc = :cc_low
      OR citizenlab.cc = :cc
      OR citizenlab.cc = 'ZZ'
) AS citiz
LEFT OUTER JOIN (
    SELECT input, SUM(msmt_cnt) AS msmt_cnt
    FROM counters_asn_test_list
    WHERE probe_cc = :cc
    AND (week IN (toStartOfWeek(now()), toStartOfWeek(now() - interval 1 week)))
    --asn-filter--
    GROUP BY input
) AS cnt
ON (citiz.url = cnt.input)
"""
    if probe_asn != 0:
        q = q.replace("--asn-filter--", "AND probe_asn = :asn")

    # support uppercase or lowercase match
    r = query_click(sa.text(q), dict(cc=cc, cc_low=cc.lower(), asn=probe_asn))
    return tuple(r)
Beispiel #4
0
def api_private_website_network_tests() -> Response:
    """TODO
    ---
    parameters:
      - name: probe_cc
        in: query
        type: string
        description: The two letter country code
    responses:
      '200':
        description: TODO
    """
    probe_cc = validate_probe_cc_query_param()
    s = """SELECT
        COUNT() AS count,
        probe_asn
        FROM fastpath
        WHERE
            measurement_start_time >= today() - interval '31 day'
        AND measurement_start_time < today()
        AND probe_cc = :probe_cc
        GROUP BY probe_asn
        ORDER BY count DESC
        """
    results = query_click(sql.text(s), {"probe_cc": probe_cc})
    return cachedjson("0s", results=results)
Beispiel #5
0
def get_recent_network_coverage_ch(probe_cc, test_groups):
    """Count ASNs with at least one measurements, grouped by day,
    for a given CC, and filtered by test groups
    Return [{"count": 58, "test_day": "2021-10-16" }, ... ]"""
    s = """SELECT
        toDate(measurement_start_time) AS test_day,
        COUNT(DISTINCT probe_asn) as count
    FROM fastpath
    WHERE test_day >= today() - interval 31 day
        AND test_day < today() - interval 1 day
        AND probe_cc = :probe_cc
        --mark--
    GROUP BY test_day ORDER BY test_day
    WITH FILL
        FROM today() - interval 31 day
        TO today() - interval 1 day
    """
    if test_groups:
        assert isinstance(test_groups, list)
        test_names = set()
        for tg in test_groups:
            tnames = TEST_GROUPS.get(tg, [])
            test_names.update(tnames)
        test_names = sorted(test_names)
        s = s.replace("--mark--", "AND test_name IN :test_names")
        d = {"probe_cc": probe_cc, "test_names": test_names}

    else:
        s = s.replace("--mark--", "")
        d = {"probe_cc": probe_cc}

    return query_click(sql.text(s), d)
Beispiel #6
0
def api_private_global_by_month() -> Response:
    """Provide global summary of measurements
    Sources: global_by_month db table
    ---
    responses:
      '200':
        description: JSON struct TODO
    """
    q = """SELECT
        COUNT(DISTINCT probe_asn) AS networks_by_month,
        COUNT(DISTINCT probe_cc) AS countries_by_month,
        COUNT() AS measurements_by_month,
        toStartOfMonth(measurement_start_time) AS month
        FROM fastpath
        WHERE measurement_start_time > toStartOfMonth(today() - interval 2 year)
        AND measurement_start_time < toStartOfMonth(today() + interval 1 month)
        GROUP BY month ORDER BY month
    """
    rows = query_click(sql.text(q), {})
    rows = list(rows)

    n = [{"date": r["month"], "value": r["networks_by_month"]} for r in rows]
    c = [{"date": r["month"], "value": r["countries_by_month"]} for r in rows]
    m = [{"date": r["month"], "value": r["measurements_by_month"]} for r in rows]
    expand_dates(n)
    expand_dates(c)
    expand_dates(m)
    return cachedjson(
        "1d", networks_by_month=n, countries_by_month=c, measurements_by_month=m
    )
Beispiel #7
0
def get_recent_test_coverage_ch(probe_cc):
    """Returns
    [{"count": 4888, "test_day": "2021-10-16", "test_group": "websites"}, ... ]
    """
    q = "SELECT DISTINCT(test_group) FROM test_groups ORDER BY test_group"
    rows = query_click(sql.text(q), {})
    test_group_names = [r["test_group"] for r in rows]

    q = """SELECT
        toDate(measurement_start_time) as measurement_start_day,
        test_group,
        COUNT() as msmt_cnt
    FROM fastpath
    ANY LEFT JOIN test_groups USING (test_name)
    WHERE measurement_start_day >= today() - interval 32 day
    AND measurement_start_day < today() - interval 2 day
    AND probe_cc = :probe_cc
    GROUP BY measurement_start_day, test_group
    """
    rows = query_click(sql.text(q), dict(probe_cc=probe_cc))
    rows = tuple(rows)
    l30d = tuple(last_30days(32, 2))
    return pivot_test_coverage(rows, test_group_names, l30d)
Beispiel #8
0
def api_private_im_stats() -> Response:
    """Instant messaging statistics
    ---
    responses:
      '200':
        description: TODO
    """
    test_name = request.args.get("test_name")
    if not test_name or test_name not in TEST_GROUPS["im"]:
        raise BadRequest("invalid test_name")

    probe_cc = validate_probe_cc_query_param()
    probe_asn = validate_probe_asn_query_param()
    probe_asn = int(probe_asn.replace("AS", ""))

    s = """SELECT
        COUNT() as total_count,
        toDate(measurement_start_time) AS test_day
        FROM fastpath
        WHERE probe_cc = :probe_cc
        AND test_name = :test_name
        AND probe_asn = :probe_asn
        AND measurement_start_time >= today() - interval '31 day'
        AND measurement_start_time < today()
        GROUP BY test_day
        ORDER BY test_day
    """
    query_params = {
        "probe_cc": probe_cc,
        "probe_asn": probe_asn,
        "test_name": test_name,
    }
    q = query_click(sql.text(s), query_params)
    tmp = {r["test_day"]: r for r in q}
    results = []
    days = [date.today() + timedelta(days=(d - 31)) for d in range(32)]
    for d in days:
        if d in tmp:
            test_day = isomid(tmp[d]["test_day"])
            total_count = tmp[d]["total_count"]
        else:
            test_day = isomid(d)
            total_count = 0
        e = dict(anomaly_count=None, test_day=test_day, total_count=total_count)
        results.append(e)

    return cachedjson("1h", results=results)  # TODO caching
Beispiel #9
0
def api_private_im_networks() -> Response:
    """Instant messaging networks statistics
    ---
    responses:
      '200':
        description: TODO
    """
    probe_cc = validate_probe_cc_query_param()
    s = """SELECT
    COUNT() AS total_count,
    '' AS name,
    toDate(MAX(measurement_start_time)) AS last_tested,
    probe_asn,
    test_name
    FROM fastpath
    WHERE measurement_start_time >= today() - interval 31 day
    AND measurement_start_time < today()
    AND probe_cc = :probe_cc
    AND test_name IN :test_names
    GROUP BY test_name, probe_asn
    ORDER BY test_name ASC, total_count DESC
    """
    results: Dict[str, Dict] = {}
    test_names = sorted(TEST_GROUPS["im"])
    q = query_click(sql.text(s), {"probe_cc": probe_cc, "test_names": test_names})
    for r in q:
        e = results.get(
            r["test_name"],
            {
                "anomaly_networks": [],
                "ok_networks": [],
                "last_tested": r["last_tested"],
            },
        )
        e["ok_networks"].append(
            {
                "asn": r["probe_asn"],
                "name": "",
                "total_count": r["total_count"],
                "last_tested": r["last_tested"],
            }
        )
        if e["last_tested"] < r["last_tested"]:
            e["last_tested"] = r["last_tested"]
        results[r["test_name"]] = e

    return cachedjson("1h", **results)  # TODO caching
Beispiel #10
0
def api_private_vanilla_tor_stats() -> Response:
    """Tor statistics over ASN for a given CC
    ---
    parameters:
      - name: probe_cc
        in: query
        type: string
        minLength: 2
        required: true
    responses:
      '200':
        description: TODO
    """
    probe_cc = validate_probe_cc_query_param()
    blocked = 0
    nets = []
    s = """SELECT
        countIf(msm_failure = 't') as failure_count,
        toDate(MAX(measurement_start_time)) AS last_tested,
        probe_asn,
        COUNT() as total_count,
        total_count - countIf(anomaly = 't') AS success_count
        FROM fastpath
        WHERE measurement_start_time > today() - INTERVAL 6 MONTH
        AND probe_cc =  :probe_cc
        GROUP BY probe_asn
    """
    q = query_click(sql.text(s), {"probe_cc": probe_cc})
    extras = {
        "test_runtime_avg": None,
        "test_runtime_max": None,
        "test_runtime_min": None,
    }
    for n in q:
        n.update(extras)
        nets.append(n)
        if n["total_count"] > 5:
            if float(n["success_count"]) / float(n["total_count"]) < 0.6:
                blocked += 1

    if not nets:
        return cachedjson("0s", networks=[], notok_networks=0, last_tested=None)

    lt = max(n["last_tested"] for n in nets)
    return cachedjson("0s", networks=nets, notok_networks=blocked, last_tested=lt)
Beispiel #11
0
def api_private_asn_by_month() -> Response:
    """Network count by month
    ---
    responses:
      '200':
        description: [{"date":"2018-08-31","value":4411}, ... ]
    """
    q = """SELECT
        COUNT(DISTINCT(probe_asn)) AS value,
        toStartOfMonth(measurement_start_time) AS date
    FROM fastpath
    WHERE measurement_start_time < toStartOfMonth(addMonths(now(), 1))
    AND measurement_start_time > toStartOfMonth(subtractMonths(now(), 24))
    GROUP BY date ORDER BY date
    """
    li = list(query_click(q, {}))
    expand_dates(li)
    return cachedjson("1d", li)
Beispiel #12
0
def failover_fetch_citizenlab_data() -> Dict[str, List[CTZ]]:
    """Fetches the citizenlab table from the database.
    Used only once at startime for failover."""
    log = current_app.logger
    log.info("Started failover_fetch_citizenlab_data")
    sql = """SELECT category_code, url
    FROM citizenlab
    WHERE cc = 'ZZ'
    """
    out: Dict[str, List[CTZ]] = {}
    query = query_click(sql, {})
    for e in query:
        catcode = e["category_code"]
        c = CTZ(e["url"], catcode)
        out.setdefault(catcode, []).append(c)

    log.info("Fetch done: %d" % len(out))
    return out
Beispiel #13
0
def api_private_countries_by_month() -> Response:
    """Countries count by month
    ---
    responses:
      '200':
        description: TODO
    """
    q = """SELECT
        COUNT(DISTINCT(probe_cc)) AS value,
        toStartOfMonth(measurement_start_time) AS date
    FROM fastpath
    WHERE measurement_start_time < toStartOfMonth(addMonths(now(), 1))
    AND measurement_start_time > toStartOfMonth(subtractMonths(now(), 24))
    GROUP BY date ORDER BY date
    """
    li = list(query_click(q, {}))
    expand_dates(li)
    return cachedjson("1d", li)
Beispiel #14
0
def list_url_priorities() -> Response:
    """List URL priority rules
    ---
    responses:
      200:
        type: string
    """
    global log
    log = current_app.logger
    log.debug("listing URL prio rules")
    query = """SELECT category_code, cc, domain, url, priority
    FROM url_priorities FINAL
    ORDER BY category_code, cc, domain, url, priority
    """
    # The url_priorities table is CollapsingMergeTree
    q = query_click(sql.text(query), {})
    rows = list(q)
    try:
        return make_response(jsonify(rules=rows))
    except BaseOONIException as e:
        return jerror(e)
Beispiel #15
0
def api_private_circumvention_stats_by_country() -> Response:
    """Aggregated statistics on protocols used for circumvention,
    grouped by country.
    ---
    responses:
      200:
        description: List of dicts with keys probe_cc and cnt
    """
    q = """SELECT probe_cc, COUNT(*) as cnt
        FROM fastpath
        WHERE measurement_start_time > today() - interval 6 month
        AND measurement_start_time < today() - interval 1 day
        AND test_name IN ['torsf', 'tor', 'stunreachability', 'psiphon','riseupvpn']
        GROUP BY probe_cc ORDER BY probe_cc
    """
    try:
        result = query_click(sql.text(q), {})
        return cachedjson("1d", v=0, results=result)

    except Exception as e:
        return cachedjson("0d", v=0, error=str(e))
Beispiel #16
0
def api_private_countries() -> Response:
    """Summary of countries
    ---
    responses:
      '200':
        description: {"countries": [{"alpha_2": x, "count": y, "name":  z}, ... ]}
    """
    q = """
    SELECT probe_cc, COUNT() AS measurement_count
    FROM fastpath
    GROUP BY probe_cc ORDER BY probe_cc
    """
    c = []
    rows = query_click(q, {})
    for r in rows:
        try:
            name = lookup_country(r["probe_cc"])
            c.append(
                dict(alpha_2=r["probe_cc"], name=name, count=r["measurement_count"])
            )
        except KeyError:
            pass

    return cachedjson("1d", countries=c)
Beispiel #17
0
def fetch_prioritization_rules(cc: str) -> tuple:
    sql = """SELECT category_code, cc, domain, url, priority
    FROM url_priorities WHERE cc = :cc OR cc = '*'
    """
    q = query_click(sa.text(sql), dict(cc=cc))
    return tuple(q)
Beispiel #18
0
def api_private_website_test_urls() -> Response:
    """TODO
    ---
    responses:
      '200':
        description: TODO
    """
    limit = int(request.args.get("limit", 10))
    if limit <= 0:
        limit = 10
    offset = int(request.args.get("offset", 0))

    probe_cc = validate_probe_cc_query_param()
    probe_asn = validate_probe_asn_query_param()
    probe_asn = int(probe_asn.replace("AS", ""))

    # Count how many distinct inputs we have in this CC / ASN / period
    s = """
        SELECT COUNT(DISTINCT(input)) as input_count
        FROM fastpath
        WHERE measurement_start_time >= today() - interval '31 day'
        AND measurement_start_time < today()
        AND test_name = 'web_connectivity'
        AND probe_cc = :probe_cc
        AND probe_asn = :probe_asn
    """
    q = query_click_one_row(
        sql.text(s), dict(probe_cc=probe_cc, probe_asn=probe_asn)
    )
    total_count = q["input_count"] if q else 0

    # Group msmts by CC / ASN / period with LIMIT and OFFSET
    s = """SELECT input,
        countIf(anomaly = 't') as anomaly_count,
        countIf(confirmed = 't') as confirmed_count,
        countIf(msm_failure = 't') as failure_count,
        COUNT() AS total_count
        FROM fastpath
        WHERE measurement_start_time >= today() - interval '31 day'
        AND measurement_start_time < today()
        AND test_name = 'web_connectivity'
        AND probe_cc =  :probe_cc
        AND probe_asn = :probe_asn
        GROUP BY input
        ORDER BY confirmed_count DESC, total_count DESC,
                anomaly_count DESC, input ASC
        LIMIT :limit
        OFFSET :offset
        """
    d = {
        "probe_cc": probe_cc,
        "probe_asn": probe_asn,
        "limit": limit,
        "offset": offset,
    }
    results = query_click(sql.text(s), d)
    current_page = math.ceil(offset / limit) + 1
    metadata = {
        "offset": offset,
        "limit": limit,
        "current_page": current_page,
        "total_count": total_count,
        "next_url": None,
    }

    # Create next_url
    if len(results) >= limit:
        args = dict(
            limit=limit,
            offset=offset + limit,
            probe_asn=probe_asn,
            probe_cc=probe_cc,
        )
        # TODO: remove BASE_URL?
        next_url = urljoin(
            current_app.config["BASE_URL"],
            "/api/_/website_urls?%s" % urlencode(args),
        )
        metadata["next_url"] = next_url

    return cachedjson("1h", metadata=metadata, results=results)  # TODO caching
Beispiel #19
0
def _list_measurements_click(
    since,
    until,
    report_id: ostr,
    probe_cc: ostr,
    probe_asn: ostr,
    test_name: ostr,
    anomaly,
    confirmed,
    failure,
    input_: ostr,
    domain: ostr,
    category_code,
    order: ostr,
    order_by,
    limit,
    offset,
) -> Any:
    INULL = ""  # Special value for input = NULL to merge rows with FULL OUTER JOIN

    ## Create fastpath columns for query
    # TODO castscores,   coalesce inputas ""
    fpwhere = []
    query_params = {}

    # Populate WHERE clauses and query_params dict

    if since is not None:
        query_params["since"] = since
        fpwhere.append(sql.text("measurement_start_time > :since"))

    if until is not None:
        query_params["until"] = until
        fpwhere.append(sql.text("measurement_start_time <= :until"))

    if report_id:
        query_params["report_id"] = report_id
        fpwhere.append(sql.text("report_id = :report_id"))

    if probe_cc:
        if probe_cc == "ZZ":
            log.info("Refusing list_measurements with probe_cc set to ZZ")
            abort(403)
        query_params["probe_cc"] = probe_cc
        fpwhere.append(sql.text("probe_cc = :probe_cc"))
    else:
        fpwhere.append(sql.text("probe_cc != 'ZZ'"))

    if probe_asn is not None:
        if probe_asn == 0:
            log.info("Refusing list_measurements with probe_asn set to 0")
            abort(403)
        query_params["probe_asn"] = probe_asn
        fpwhere.append(sql.text("probe_asn = :probe_asn"))
    else:
        # https://ooni.org/post/2020-ooni-probe-asn-incident-report/
        # https://github.com/ooni/explorer/issues/495
        fpwhere.append(sql.text("probe_asn != 0"))

    if test_name is not None:
        query_params["test_name"] = test_name
        fpwhere.append(sql.text("test_name = :test_name"))

    # Filter on anomaly, confirmed and failure:
    # The database stores anomaly and confirmed as boolean + NULL and stores
    # failures in different columns. This leads to many possible combinations
    # but only a subset is used.
    # On anomaly and confirmed: any value != TRUE is treated as FALSE
    # See test_list_measurements_filter_flags_fastpath

    if anomaly is True:
        fpwhere.append(sql.text("fastpath.anomaly = 't'"))

    elif anomaly is False:
        fpwhere.append(sql.text("fastpath.anomaly = 'f'"))

    if confirmed is True:
        fpwhere.append(sql.text("fastpath.confirmed = 't'"))

    elif confirmed is False:
        fpwhere.append(sql.text("fastpath.confirmed = 'f'"))

    if failure is True:
        # residual_no is never NULL, msm_failure is always NULL
        fpwhere.append(sql.text("fastpath.msm_failure = 't'"))

    elif failure is False:
        # on success measurement.exc is NULL
        fpwhere.append(sql.text("fastpath.msm_failure = 'f'"))

    fpq_table = sql.table("fastpath")

    if input_:
        # input_ overrides domain and category_code
        query_params["input"] = input_
        fpwhere.append(sql.text("input = :input"))

    elif domain or category_code:
        # both domain and category_code can be set at the same time
        if domain:
            query_params["domain"] = domain
            fpwhere.append(sql.text("domain = :domain"))

        if category_code:
            query_params["category_code"] = category_code
            fpq_table = fpq_table.join(
                sql.table("citizenlab"),
                sql.text("citizenlab.url = fastpath.input"),
            )
            fpwhere.append(
                sql.text("citizenlab.category_code = :category_code"))

    fp_query = select("*").where(and_(*fpwhere)).select_from(fpq_table)
    # .limit(offset + limit)

    # SELECT * FROM fastpath  WHERE measurement_start_time <= '2019-01-01T00:00:00'::timestamp AND probe_cc = 'YT' ORDER BY test_start_time desc   LIMIT 100 OFFSET 0;
    # is using BRIN and running slowly

    if order_by is None:
        order_by = "measurement_start_time"

    fp_query = fp_query.order_by(text("{} {}".format(order_by, order)))

    # Assemble the "external" query. Run a final order by followed by limit and
    # offset
    query = fp_query.offset(offset).limit(limit)
    query_params["param_1"] = limit
    query_params["param_2"] = offset

    # Run the query, generate the results list
    iter_start_time = time.time()

    try:
        rows = query_click(query, query_params)
        tmpresults = []
        for row in rows:
            if row["input"] in (None, ""):
                url = genurl("/api/v1/raw_measurement",
                             report_id=row["report_id"])
            else:
                url = genurl(
                    "/api/v1/raw_measurement",
                    report_id=row["report_id"],
                    input=row["input"],
                )
            tmpresults.append({
                "measurement_url":
                url,
                "report_id":
                row["report_id"],
                "probe_cc":
                row["probe_cc"],
                "probe_asn":
                "AS{}".format(row["probe_asn"]),
                "test_name":
                row["test_name"],
                "measurement_start_time":
                row["measurement_start_time"],
                "input":
                row["input"],
                "anomaly":
                row["anomaly"] == "t",
                "confirmed":
                row["confirmed"] == "t",
                "failure":
                row["msm_failure"] == "t",
                "scores":
                json.loads(row["scores"]),
            })
    except OperationalError as exc:
        log.error(exc)
        if isinstance(exc.orig, QueryCanceledError):
            # Timeout due to a slow query. Generate metric and do not feed it
            # to Sentry.
            abort(504)

        raise exc

    # For each report_id / input tuple, we want at most one entry.
    results = _merge_results(tmpresults)

    # Replace the special value INULL for "input" with None
    for i, r in enumerate(results):
        if r["input"] == INULL:
            results[i]["input"] = None

    pages = -1
    count = -1
    current_page = math.ceil(offset / limit) + 1

    # We got less results than what we expected, we know the count and that
    # we are done
    if len(tmpresults) < limit:
        count = offset + len(results)
        pages = math.ceil(count / limit)
        next_url = None
    else:
        # XXX this is too intensive. find a workaround
        # count_start_time = time.time()
        # count = q.count()
        # pages = math.ceil(count / limit)
        # current_page = math.ceil(offset / limit) + 1
        # query_time += time.time() - count_start_time
        next_args = request.args.to_dict()
        next_args["offset"] = str(offset + limit)
        next_args["limit"] = str(limit)
        next_url = genurl("/api/v1/measurements", **next_args)

    query_time = time.time() - iter_start_time
    metadata = {
        "offset": offset,
        "limit": limit,
        "count": count,
        "pages": pages,
        "current_page": current_page,
        "next_url": next_url,
        "query_time": query_time,
    }

    response = jsonify({"metadata": metadata, "results": results[:limit]})
    response.cache_control.max_age = 1
    return response
Beispiel #20
0
def get_torsf_stats() -> Response:
    """Tor Pluggable Transports statistics
    Average / percentiles / total_count grouped by day
    Either group-by or filter by probe_cc
    Returns a format similar to get_aggregated
    ---
    parameters:
      - name: probe_cc
        in: query
        type: string
        description: The two letter country code
        minLength: 2
      - name: since
        in: query
        type: string
        description: >-
          The start date of when measurements were run (ex.
          "2016-10-20T10:30:00")
      - name: until
        in: query
        type: string
        description: >-
          The end date of when measurement were run (ex.
          "2016-10-20T10:30:00")
    responses:
      '200':
        description: Returns aggregated counters
    """
    log = current_app.logger
    param = request.args.get
    probe_cc = param("probe_cc")
    since = param("since")
    until = param("until")
    cacheable = False

    cols = [
        sql.text("toDate(measurement_start_time) AS measurement_start_day"),
        column("probe_cc"),
        sql.text("countIf(anomaly = 't') AS anomaly_count"),
        sql.text("countIf(confirmed = 't') AS confirmed_count"),
        sql.text("countIf(msm_failure = 't') AS failure_count"),
    ]
    table = sql.table("fastpath")
    where = [sql.text("test_name = 'torsf'")]
    query_params = {}

    if probe_cc:
        where.append(sql.text("probe_cc = :probe_cc"))
        query_params["probe_cc"] = probe_cc

    if since:
        since = parse_date(since)
        where.append(sql.text("measurement_start_time > :since"))
        query_params["since"] = since

    if until:
        until = parse_date(until)
        where.append(sql.text("measurement_start_time <= :until"))
        query_params["until"] = until
        cacheable = until < datetime.now() - timedelta(hours=72)

    # Assemble query
    where_expr = and_(*where)
    query = select(cols).where(where_expr).select_from(table)

    query = query.group_by(column("measurement_start_day"), column("probe_cc"))
    query = query.order_by(column("measurement_start_day"), column("probe_cc"))

    try:
        q = query_click(query, query_params)
        result = []
        for row in q:
            row = dict(row)
            row["anomaly_rate"] = row["anomaly_count"] / row[
                "measurement_count"]
            result.append(row)
        response = jsonify({"v": 0, "result": result})
        if cacheable:
            response.cache_control.max_age = 3600 * 24
        return response

    except Exception as e:
        return jsonify({"v": 0, "error": str(e)})
Beispiel #21
0
def _clickhouse_aggregation(
    resp_format: str,
    download: bool,
    since,
    until,
    inp: ostr,
    domain: ostr,
    category_code: ostr,
    probe_cc: ostr,
    probe_asn: Optional[int],
    test_name: ostr,
    axis_x: ostr,
    axis_y: ostr,
):
    log = current_app.logger
    dimension_cnt = int(bool(axis_x)) + int(bool(axis_y))
    cacheable = until and until < datetime.now() - timedelta(hours=72)
    cacheable = False  # FIXME

    # Assemble query
    colnames = [
        "anomaly_count",
        "confirmed_count",
        "failure_count",
        "ok_count",
        "measurement_count",
    ]
    cols = [
        sql.text(
            "countIf(anomaly = 't' AND confirmed = 'f' AND msm_failure = 'f') AS anomaly_count"
        ),
        sql.text(
            "countIf(confirmed = 't' AND msm_failure = 'f') AS confirmed_count"
        ),
        sql.text("countIf(msm_failure = 't') AS failure_count"),
        sql.text(
            "countIf(anomaly = 'f' AND confirmed = 'f' AND msm_failure = 'f') AS ok_count"
        ),
        sql.text("COUNT(*) AS measurement_count"),
    ]
    table = sql.table("fastpath")
    where = []
    query_params = {}

    if domain:
        where.append(sql.text("domain = :domain"))
        query_params["domain"] = domain

    if inp:
        where.append(sql.text("input = :input"))
        query_params["input"] = inp

    if category_code:
        where.append(sql.text("citizenlab.category_code = :category_code"))
        query_params["category_code"] = category_code
        if probe_cc:
            where.append(
                sql.text("(citizenlab.cc = :lcc OR citizenlab.cc = 'ZZ')"))
            query_params["lcc"] = probe_cc.lower()
        else:
            where.append(sql.text("citizenlab.cc = 'ZZ'"))

    if probe_cc:
        where.append(sql.text("probe_cc = :probe_cc"))
        query_params["probe_cc"] = probe_cc

    if probe_asn is not None:
        where.append(sql.text("probe_asn = :probe_asn"))
        query_params["probe_asn"] = str(probe_asn)

    if since:
        where.append(sql.text("measurement_start_time > :since"))
        query_params["since"] = since

    if until:
        where.append(sql.text("measurement_start_time <= :until"))
        query_params["until"] = until

    if test_name:
        where.append(sql.text("test_name = :test_name"))
        query_params["test_name"] = test_name

    if axis_x == axis_y and axis_x is not None:
        raise ValueError("Axis X and Y cannot be the same")

    def group_by_date(since, until, cols, colnames, group_by):
        if since and until:
            delta = abs((until - since).days)
        else:
            delta = 0

        # TODO: add a granularity parameter and a check around query weight /
        # / response size. Also add support in CSV format.
        granularity = ("day", "toDate")
        tcol, fun = granularity
        tcol = "measurement_start_day"  # TODO: support dynamic axis names
        cols.append(sql.text(f"{fun}(measurement_start_time) AS {tcol}"))
        colnames.append(tcol)
        group_by.append(column(tcol))

    def add_axis(axis, cols, colnames, group_by):
        if axis == "blocking_type":
            t = "JSONExtractString(scores, 'analysis', 'blocking_type') AS blocking_type"
            cols.append(sql.text(t))
        else:
            validate_axis_name(axis)
            cols.append(sql.text(axis))
        colnames.append(axis)
        group_by.append(column(axis))

    group_by: List = []
    if axis_x == "measurement_start_day":
        group_by_date(since, until, cols, colnames, group_by)
    elif axis_x:
        add_axis(axis_x, cols, colnames, group_by)

    if axis_y == "measurement_start_day":
        group_by_date(since, until, cols, colnames, group_by)
    elif axis_y:
        add_axis(axis_y, cols, colnames, group_by)

    # Assemble query
    if category_code or axis_x == "category_code" or axis_y == "category_code":
        # Join in the citizenlab table if we need to filter on category_code
        # or perform group-by on it
        table = table.join(
            sql.table("citizenlab"),
            sql.text("citizenlab.url = fastpath.input"),
        )

    where_expr = and_(*where)
    query = select(cols).where(where_expr).select_from(table)

    # Add group-by
    for g in group_by:
        query = query.group_by(g).order_by(g)

    try:
        if dimension_cnt > 0:
            r: Any = list(query_click(query, query_params))
        else:
            r = query_click_one_row(query, query_params)

        pq = current_app.click.last_query
        msg = f"Stats: {pq.progress.rows} {pq.progress.bytes} {pq.progress.total_rows} {pq.elapsed}"
        log.info(msg)

        if resp_format == "CSV":
            csv_data = _convert_to_csv(r)
            response = make_response(csv_data)
            response.headers["Content-Type"] = "text/csv"
            if download:
                set_dload(response, "ooni-aggregate-data.csv")

        else:
            resp_d = {
                "v": 0,
                "dimension_count": dimension_cnt,
                "result": r,
                "db_stats": {
                    "row_count": pq.progress.rows,
                    "bytes": pq.progress.bytes,
                    "total_row_count": pq.progress.total_rows,
                    "elapsed_seconds": pq.elapsed,
                },
            }
            response = jsonify(resp_d)
            if download:
                set_dload(response, "ooni-aggregate-data.json")

        if cacheable:
            response.cache_control.max_age = 3600 * 24
        return response

    except Exception as e:
        return jsonify({"v": 0, "error": str(e)})