Exemple #1
0
def test_path_from_row_pks(row, pks, expected_path):
    actual_path = utils.path_from_row_pks(row, pks, False)
    assert expected_path == actual_path
Exemple #2
0
def json_renderer(args, data, view_name):
    """ Render a response as JSON """
    status_code = 200
    # Handle the _json= parameter which may modify data["rows"]
    json_cols = []
    if "_json" in args:
        json_cols = args.getlist("_json")
    if json_cols and "rows" in data and "columns" in data:
        data["rows"] = convert_specific_columns_to_json(
            data["rows"], data["columns"], json_cols)

    # unless _json_infinity=1 requested, replace infinity with None
    if "rows" in data and not value_as_boolean(args.get("_json_infinity",
                                                        "0")):
        data["rows"] = [remove_infinites(row) for row in data["rows"]]

    # Deal with the _shape option
    shape = args.get("_shape", "arrays")

    next_url = data.get("next_url")

    if shape == "arrayfirst":
        data = [row[0] for row in data["rows"]]
    elif shape in ("objects", "object", "array"):
        columns = data.get("columns")
        rows = data.get("rows")
        if rows and columns:
            data["rows"] = [dict(zip(columns, row)) for row in rows]
        if shape == "object":
            error = None
            if "primary_keys" not in data:
                error = "_shape=object is only available on tables"
            else:
                pks = data["primary_keys"]
                if not pks:
                    error = (
                        "_shape=object not available for tables with no primary keys"
                    )
                else:
                    object_rows = {}
                    for row in data["rows"]:
                        pk_string = path_from_row_pks(row, pks, not pks)
                        object_rows[pk_string] = row
                    data = object_rows
            if error:
                data = {"ok": False, "error": error}
        elif shape == "array":
            data = data["rows"]

    elif shape == "arrays":
        pass
    else:
        status_code = 400
        data = {
            "ok": False,
            "error": f"Invalid _shape: {shape}",
            "status": 400,
            "title": None,
        }
    # Handle _nl option for _shape=array
    nl = args.get("_nl", "")
    if nl and shape == "array":
        body = "\n".join(
            json.dumps(item, cls=CustomJSONEncoder) for item in data)
        content_type = "text/plain"
    else:
        body = json.dumps(data, cls=CustomJSONEncoder)
        content_type = "application/json; charset=utf-8"
    headers = {}
    if next_url:
        headers["link"] = f'<{next_url}>; rel="next"'
    return Response(body,
                    status=status_code,
                    headers=headers,
                    content_type=content_type)
Exemple #3
0
    async def data(
        self,
        request,
        database,
        hash,
        table,
        default_labels=False,
        _next=None,
        _size=None,
    ):
        canned_query = await self.ds.get_canned_query(database, table, request.actor)
        if canned_query:
            return await QueryView(self.ds).data(
                request,
                database,
                hash,
                canned_query["sql"],
                metadata=canned_query,
                editable=False,
                canned_query=table,
                named_parameters=canned_query.get("params"),
                write=bool(canned_query.get("write")),
            )

        db = self.ds.databases[database]
        is_view = bool(await db.get_view_definition(table))
        table_exists = bool(await db.table_exists(table))
        if not is_view and not table_exists:
            raise NotFound("Table not found: {}".format(table))

        await self.check_permission(request, "view-instance")
        await self.check_permission(request, "view-database", database)
        await self.check_permission(request, "view-table", (database, table))

        private = not await self.ds.permission_allowed(
            None, "view-table", (database, table), default=True
        )

        pks = await db.primary_keys(table)
        table_columns = await db.table_columns(table)

        select_columns = ", ".join(escape_sqlite(t) for t in table_columns)

        use_rowid = not pks and not is_view
        if use_rowid:
            select = "rowid, {}".format(select_columns)
            order_by = "rowid"
            order_by_pks = "rowid"
        else:
            select = select_columns
            order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks])
            order_by = order_by_pks

        if is_view:
            order_by = ""

        # Ensure we don't drop anything with an empty value e.g. ?name__exact=
        args = MultiParams(
            urllib.parse.parse_qs(request.query_string, keep_blank_values=True)
        )

        # Special args start with _ and do not contain a __
        # That's so if there is a column that starts with _
        # it can still be queried using ?_col__exact=blah
        special_args = {}
        other_args = []
        for key in args:
            if key.startswith("_") and "__" not in key:
                special_args[key] = args[key]
            else:
                for v in args.getlist(key):
                    other_args.append((key, v))

        # Handle ?_filter_column and redirect, if present
        redirect_params = filters_should_redirect(special_args)
        if redirect_params:
            return self.redirect(
                request,
                path_with_added_args(request, redirect_params),
                forward_querystring=False,
            )

        # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort)
        if "_sort_by_desc" in special_args:
            return self.redirect(
                request,
                path_with_added_args(
                    request,
                    {
                        "_sort_desc": special_args.get("_sort"),
                        "_sort_by_desc": None,
                        "_sort": None,
                    },
                ),
                forward_querystring=False,
            )

        table_metadata = self.ds.table_metadata(database, table)
        units = table_metadata.get("units", {})
        filters = Filters(sorted(other_args), units, ureg)
        where_clauses, params = filters.build_where_clauses(table)

        extra_wheres_for_ui = []
        # Add _where= from querystring
        if "_where" in request.args:
            if not await self.ds.permission_allowed(
                request.actor, "execute-sql", resource=database, default=True,
            ):
                raise DatasetteError("_where= is not allowed", status=403)
            else:
                where_clauses.extend(request.args.getlist("_where"))
                extra_wheres_for_ui = [
                    {
                        "text": text,
                        "remove_url": path_with_removed_args(request, {"_where": text}),
                    }
                    for text in request.args.getlist("_where")
                ]

        # Support for ?_through={table, column, value}
        extra_human_descriptions = []
        if "_through" in request.args:
            for through in request.args.getlist("_through"):
                through_data = json.loads(through)
                through_table = through_data["table"]
                other_column = through_data["column"]
                value = through_data["value"]
                outgoing_foreign_keys = await db.foreign_keys_for_table(through_table)
                try:
                    fk_to_us = [
                        fk for fk in outgoing_foreign_keys if fk["other_table"] == table
                    ][0]
                except IndexError:
                    raise DatasetteError(
                        "Invalid _through - could not find corresponding foreign key"
                    )
                param = "p{}".format(len(params))
                where_clauses.append(
                    "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format(
                        through_table=escape_sqlite(through_table),
                        our_pk=escape_sqlite(fk_to_us["other_column"]),
                        our_column=escape_sqlite(fk_to_us["column"]),
                        other_column=escape_sqlite(other_column),
                        param=param,
                    )
                )
                params[param] = value
                extra_human_descriptions.append(
                    '{}.{} = "{}"'.format(through_table, other_column, value)
                )

        # _search support:
        fts_table = special_args.get("_fts_table")
        fts_table = fts_table or table_metadata.get("fts_table")
        fts_table = fts_table or await db.fts_table(table)
        fts_pk = special_args.get("_fts_pk", table_metadata.get("fts_pk", "rowid"))
        search_args = dict(
            pair for pair in special_args.items() if pair[0].startswith("_search")
        )
        search = ""
        search_mode_raw = special_args.get("_searchmode") == "raw"
        if fts_table and search_args:
            if "_search" in search_args:
                # Simple ?_search=xxx
                search = search_args["_search"]
                where_clauses.append(
                    "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})".format(
                        fts_table=escape_sqlite(fts_table),
                        fts_pk=escape_sqlite(fts_pk),
                        match_clause=":search"
                        if search_mode_raw
                        else "escape_fts(:search)",
                    )
                )
                extra_human_descriptions.append('search matches "{}"'.format(search))
                params["search"] = search
            else:
                # More complex: search against specific columns
                for i, (key, search_text) in enumerate(search_args.items()):
                    search_col = key.split("_search_", 1)[1]
                    if search_col not in await db.table_columns(fts_table):
                        raise DatasetteError("Cannot search by that column", status=400)

                    where_clauses.append(
                        "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format(
                            fts_table=escape_sqlite(fts_table),
                            search_col=escape_sqlite(search_col),
                            match_clause=":search_{}".format(i)
                            if search_mode_raw
                            else "escape_fts(:search_{})".format(i),
                        )
                    )
                    extra_human_descriptions.append(
                        'search column "{}" matches "{}"'.format(
                            search_col, search_text
                        )
                    )
                    params["search_{}".format(i)] = search_text

        sortable_columns = set()

        sortable_columns = await self.sortable_columns_for_table(
            database, table, use_rowid
        )

        # Allow for custom sort order
        sort = special_args.get("_sort")
        sort_desc = special_args.get("_sort_desc")

        if not sort and not sort_desc:
            sort = table_metadata.get("sort")
            sort_desc = table_metadata.get("sort_desc")

        if sort and sort_desc:
            raise DatasetteError("Cannot use _sort and _sort_desc at the same time")

        if sort:
            if sort not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort))

            order_by = escape_sqlite(sort)

        if sort_desc:
            if sort_desc not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort_desc))

            order_by = "{} desc".format(escape_sqlite(sort_desc))

        from_sql = "from {table_name} {where}".format(
            table_name=escape_sqlite(table),
            where=("where {} ".format(" and ".join(where_clauses)))
            if where_clauses
            else "",
        )
        # Copy of params so we can mutate them later:
        from_sql_params = dict(**params)

        count_sql = "select count(*) {}".format(from_sql)

        _next = _next or special_args.get("_next")
        offset = ""
        if _next:
            if is_view:
                # _next is an offset
                offset = " offset {}".format(int(_next))
            else:
                components = urlsafe_components(_next)
                # If a sort order is applied, the first of these is the sort value
                if sort or sort_desc:
                    sort_value = components[0]
                    # Special case for if non-urlencoded first token was $null
                    if _next.split(",")[0] == "$null":
                        sort_value = None
                    components = components[1:]

                # Figure out the SQL for next-based-on-primary-key first
                next_by_pk_clauses = []
                if use_rowid:
                    next_by_pk_clauses.append("rowid > :p{}".format(len(params)))
                    params["p{}".format(len(params))] = components[0]
                else:
                    # Apply the tie-breaker based on primary keys
                    if len(components) == len(pks):
                        param_len = len(params)
                        next_by_pk_clauses.append(
                            compound_keys_after_sql(pks, param_len)
                        )
                        for i, pk_value in enumerate(components):
                            params["p{}".format(param_len + i)] = pk_value

                # Now add the sort SQL, which may incorporate next_by_pk_clauses
                if sort or sort_desc:
                    if sort_value is None:
                        if sort_desc:
                            # Just items where column is null ordered by pk
                            where_clauses.append(
                                "({column} is null and {next_clauses})".format(
                                    column=escape_sqlite(sort_desc),
                                    next_clauses=" and ".join(next_by_pk_clauses),
                                )
                            )
                        else:
                            where_clauses.append(
                                "({column} is not null or ({column} is null and {next_clauses}))".format(
                                    column=escape_sqlite(sort),
                                    next_clauses=" and ".join(next_by_pk_clauses),
                                )
                            )
                    else:
                        where_clauses.append(
                            "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format(
                                column=escape_sqlite(sort or sort_desc),
                                op=">" if sort else "<",
                                p=len(params),
                                extra_desc_only=""
                                if sort
                                else " or {column2} is null".format(
                                    column2=escape_sqlite(sort or sort_desc)
                                ),
                                next_clauses=" and ".join(next_by_pk_clauses),
                            )
                        )
                        params["p{}".format(len(params))] = sort_value
                    order_by = "{}, {}".format(order_by, order_by_pks)
                else:
                    where_clauses.extend(next_by_pk_clauses)

        where_clause = ""
        if where_clauses:
            where_clause = "where {} ".format(" and ".join(where_clauses))

        if order_by:
            order_by = "order by {} ".format(order_by)

        extra_args = {}
        # Handle ?_size=500
        page_size = _size or request.args.get("_size") or table_metadata.get("size")
        if page_size:
            if page_size == "max":
                page_size = self.ds.max_returned_rows
            try:
                page_size = int(page_size)
                if page_size < 0:
                    raise ValueError

            except ValueError:
                raise DatasetteError("_size must be a positive integer", status=400)

            if page_size > self.ds.max_returned_rows:
                raise DatasetteError(
                    "_size must be <= {}".format(self.ds.max_returned_rows), status=400
                )

            extra_args["page_size"] = page_size
        else:
            page_size = self.ds.page_size

        sql_no_limit = "select {select} from {table_name} {where}{order_by}".format(
            select=select,
            table_name=escape_sqlite(table),
            where=where_clause,
            order_by=order_by,
        )
        sql = "{sql_no_limit} limit {limit}{offset}".format(
            sql_no_limit=sql_no_limit.rstrip(), limit=page_size + 1, offset=offset
        )

        if request.args.get("_timelimit"):
            extra_args["custom_time_limit"] = int(request.args.get("_timelimit"))

        results = await db.execute(sql, params, truncate=True, **extra_args)

        # Number of filtered rows in whole set:
        filtered_table_rows_count = None
        if (
            not db.is_mutable
            and self.ds.inspect_data
            and count_sql == "select count(*) from {} ".format(table)
        ):
            try:
                filtered_table_rows_count = self.ds.inspect_data[database]["tables"][
                    table
                ]["count"]
            except KeyError:
                pass

        if count_sql and filtered_table_rows_count is None:
            try:
                count_rows = list(await db.execute(count_sql, from_sql_params))
                filtered_table_rows_count = count_rows[0][0]
            except QueryInterrupted:
                pass

        # facets support
        if not self.ds.config("allow_facet") and any(
            arg.startswith("_facet") for arg in request.args
        ):
            raise DatasetteError("_facet= is not allowed", status=400)

        # pylint: disable=no-member
        facet_classes = list(
            itertools.chain.from_iterable(pm.hook.register_facet_classes())
        )
        facet_results = {}
        facets_timed_out = []
        facet_instances = []
        for klass in facet_classes:
            facet_instances.append(
                klass(
                    self.ds,
                    request,
                    database,
                    sql=sql_no_limit,
                    params=params,
                    table=table,
                    metadata=table_metadata,
                    row_count=filtered_table_rows_count,
                )
            )

        for facet in facet_instances:
            (
                instance_facet_results,
                instance_facets_timed_out,
            ) = await facet.facet_results()
            facet_results.update(instance_facet_results)
            facets_timed_out.extend(instance_facets_timed_out)

        # Figure out columns and rows for the query
        columns = [r[0] for r in results.description]
        rows = list(results.rows)

        # Expand labeled columns if requested
        expanded_columns = []
        expandable_columns = await self.expandable_columns(database, table)
        columns_to_expand = None
        try:
            all_labels = value_as_boolean(special_args.get("_labels", ""))
        except ValueError:
            all_labels = default_labels
        # Check for explicit _label=
        if "_label" in request.args:
            columns_to_expand = request.args.getlist("_label")
        if columns_to_expand is None and all_labels:
            # expand all columns with foreign keys
            columns_to_expand = [fk["column"] for fk, _ in expandable_columns]

        if columns_to_expand:
            expanded_labels = {}
            for fk, _ in expandable_columns:
                column = fk["column"]
                if column not in columns_to_expand:
                    continue
                expanded_columns.append(column)
                # Gather the values
                column_index = columns.index(column)
                values = [row[column_index] for row in rows]
                # Expand them
                expanded_labels.update(
                    await self.ds.expand_foreign_keys(database, table, column, values)
                )
            if expanded_labels:
                # Rewrite the rows
                new_rows = []
                for row in rows:
                    new_row = CustomRow(columns)
                    for column in row.keys():
                        value = row[column]
                        if (column, value) in expanded_labels and value is not None:
                            new_row[column] = {
                                "value": value,
                                "label": expanded_labels[(column, value)],
                            }
                        else:
                            new_row[column] = value
                    new_rows.append(new_row)
                rows = new_rows

        # Pagination next link
        next_value = None
        next_url = None
        if len(rows) > page_size and page_size > 0:
            if is_view:
                next_value = int(_next or 0) + page_size
            else:
                next_value = path_from_row_pks(rows[-2], pks, use_rowid)
            # If there's a sort or sort_desc, add that value as a prefix
            if (sort or sort_desc) and not is_view:
                prefix = rows[-2][sort or sort_desc]
                if isinstance(prefix, dict) and "value" in prefix:
                    prefix = prefix["value"]
                if prefix is None:
                    prefix = "$null"
                else:
                    prefix = urllib.parse.quote_plus(str(prefix))
                next_value = "{},{}".format(prefix, next_value)
                added_args = {"_next": next_value}
                if sort:
                    added_args["_sort"] = sort
                else:
                    added_args["_sort_desc"] = sort_desc
            else:
                added_args = {"_next": next_value}
            next_url = self.ds.absolute_url(
                request, path_with_replaced_args(request, added_args)
            )
            rows = rows[:page_size]

        # Detect suggested facets
        suggested_facets = []

        if (
            self.ds.config("suggest_facets")
            and self.ds.config("allow_facet")
            and not _next
        ):
            for facet in facet_instances:
                suggested_facets.extend(await facet.suggest())

        # human_description_en combines filters AND search, if provided
        human_description_en = filters.human_description_en(
            extra=extra_human_descriptions
        )

        if sort or sort_desc:
            sorted_by = "sorted by {}{}".format(
                (sort or sort_desc), " descending" if sort_desc else ""
            )
            human_description_en = " ".join(
                [b for b in [human_description_en, sorted_by] if b]
            )

        async def extra_template():
            nonlocal sort

            display_columns, display_rows = await self.display_columns_and_rows(
                database,
                table,
                results.description,
                rows,
                link_column=not is_view,
                truncate_cells=self.ds.config("truncate_cells_html"),
            )
            metadata = (
                (self.ds.metadata("databases") or {})
                .get(database, {})
                .get("tables", {})
                .get(table, {})
            )
            self.ds.update_with_inherited_metadata(metadata)
            form_hidden_args = []
            for arg in ("_fts_table", "_fts_pk"):
                if arg in special_args:
                    form_hidden_args.append((arg, special_args[arg]))
            if request.args.get("_where"):
                for where_text in request.args.getlist("_where"):
                    form_hidden_args.append(("_where", where_text))

            # if no sort specified AND table has a single primary key,
            # set sort to that so arrow is displayed
            if not sort and not sort_desc:
                if 1 == len(pks):
                    sort = pks[0]
                elif use_rowid:
                    sort = "rowid"

            return {
                "supports_search": bool(fts_table),
                "search": search or "",
                "use_rowid": use_rowid,
                "filters": filters,
                "display_columns": display_columns,
                "filter_columns": columns,
                "display_rows": display_rows,
                "facets_timed_out": facets_timed_out,
                "sorted_facet_results": sorted(
                    facet_results.values(),
                    key=lambda f: (len(f["results"]), f["name"]),
                    reverse=True,
                ),
                "extra_wheres_for_ui": extra_wheres_for_ui,
                "form_hidden_args": form_hidden_args,
                "is_sortable": any(c["sortable"] for c in display_columns),
                "path_with_replaced_args": path_with_replaced_args,
                "path_with_removed_args": path_with_removed_args,
                "append_querystring": append_querystring,
                "request": request,
                "sort": sort,
                "sort_desc": sort_desc,
                "disable_sort": is_view,
                "custom_table_templates": [
                    "_table-{}-{}.html".format(
                        to_css_class(database), to_css_class(table)
                    ),
                    "_table-table-{}-{}.html".format(
                        to_css_class(database), to_css_class(table)
                    ),
                    "_table.html",
                ],
                "metadata": metadata,
                "view_definition": await db.get_view_definition(table),
                "table_definition": await db.get_table_definition(table),
            }

        return (
            {
                "database": database,
                "table": table,
                "is_view": is_view,
                "human_description_en": human_description_en,
                "rows": rows[:page_size],
                "truncated": results.truncated,
                "filtered_table_rows_count": filtered_table_rows_count,
                "expanded_columns": expanded_columns,
                "expandable_columns": expandable_columns,
                "columns": columns,
                "primary_keys": pks,
                "units": units,
                "query": {"sql": sql, "params": params},
                "facet_results": facet_results,
                "suggested_facets": suggested_facets,
                "next": next_value and str(next_value) or None,
                "next_url": next_url,
                "private": private,
                "allow_execute_sql": await self.ds.permission_allowed(
                    request.actor, "execute-sql", database, default=True
                ),
            },
            extra_template,
            (
                "table-{}-{}.html".format(to_css_class(database), to_css_class(table)),
                "table.html",
            ),
        )
Exemple #4
0
    async def display_columns_and_rows(
        self, database, table, description, rows, link_column=False, truncate_cells=0
    ):
        "Returns columns, rows for specified table - including fancy foreign key treatment"
        db = self.ds.databases[database]
        table_metadata = self.ds.table_metadata(database, table)
        sortable_columns = await self.sortable_columns_for_table(database, table, True)
        columns = [
            {"name": r[0], "sortable": r[0] in sortable_columns} for r in description
        ]
        pks = await db.primary_keys(table)
        column_to_foreign_key_table = {
            fk["column"]: fk["other_table"]
            for fk in await db.foreign_keys_for_table(table)
        }

        cell_rows = []
        base_url = self.ds.config("base_url")
        for row in rows:
            cells = []
            # Unless we are a view, the first column is a link - either to the rowid
            # or to the simple or compound primary key
            if link_column:
                is_special_link_column = len(pks) != 1
                pk_path = path_from_row_pks(row, pks, not pks, False)
                cells.append(
                    {
                        "column": pks[0] if len(pks) == 1 else "Link",
                        "value_type": "pk",
                        "is_special_link_column": is_special_link_column,
                        "raw": pk_path,
                        "value": jinja2.Markup(
                            '<a href="{base_url}{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
                                base_url=base_url,
                                database=database,
                                table=urllib.parse.quote_plus(table),
                                flat_pks=str(jinja2.escape(pk_path)),
                                flat_pks_quoted=path_from_row_pks(row, pks, not pks),
                            )
                        ),
                    }
                )

            for value, column_dict in zip(row, columns):
                column = column_dict["name"]
                if link_column and len(pks) == 1 and column == pks[0]:
                    # If there's a simple primary key, don't repeat the value as it's
                    # already shown in the link column.
                    continue

                # First let the plugins have a go
                # pylint: disable=no-member
                plugin_display_value = pm.hook.render_cell(
                    value=value,
                    column=column,
                    table=table,
                    database=database,
                    datasette=self.ds,
                )
                if plugin_display_value is not None:
                    display_value = plugin_display_value
                elif isinstance(value, bytes):
                    display_value = jinja2.Markup(
                        "&lt;Binary&nbsp;data:&nbsp;{}&nbsp;byte{}&gt;".format(
                            len(value), "" if len(value) == 1 else "s"
                        )
                    )
                elif isinstance(value, dict):
                    # It's an expanded foreign key - display link to other row
                    label = value["label"]
                    value = value["value"]
                    # The table we link to depends on the column
                    other_table = column_to_foreign_key_table[column]
                    link_template = (
                        LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE
                    )
                    display_value = jinja2.Markup(
                        link_template.format(
                            database=database,
                            base_url=base_url,
                            table=urllib.parse.quote_plus(other_table),
                            link_id=urllib.parse.quote_plus(str(value)),
                            id=str(jinja2.escape(value)),
                            label=str(jinja2.escape(label)),
                        )
                    )
                elif value in ("", None):
                    display_value = jinja2.Markup("&nbsp;")
                elif is_url(str(value).strip()):
                    display_value = jinja2.Markup(
                        '<a href="{url}">{url}</a>'.format(
                            url=jinja2.escape(value.strip())
                        )
                    )
                elif column in table_metadata.get("units", {}) and value != "":
                    # Interpret units using pint
                    value = value * ureg(table_metadata["units"][column])
                    # Pint uses floating point which sometimes introduces errors in the compact
                    # representation, which we have to round off to avoid ugliness. In the vast
                    # majority of cases this rounding will be inconsequential. I hope.
                    value = round(value.to_compact(), 6)
                    display_value = jinja2.Markup(
                        "{:~P}".format(value).replace(" ", "&nbsp;")
                    )
                else:
                    display_value = str(value)
                    if truncate_cells and len(display_value) > truncate_cells:
                        display_value = display_value[:truncate_cells] + u"\u2026"

                cells.append(
                    {
                        "column": column,
                        "value": display_value,
                        "raw": value,
                        "value_type": "none"
                        if value is None
                        else str(type(value).__name__),
                    }
                )
            cell_rows.append(Row(cells))

        if link_column:
            # Add the link column header.
            # If it's a simple primary key, we have to remove and re-add that column name at
            # the beginning of the header row.
            if len(pks) == 1:
                columns = [col for col in columns if col["name"] != pks[0]]

            columns = [
                {"name": pks[0] if len(pks) == 1 else "Link", "sortable": len(pks) == 1}
            ] + columns
        return columns, cell_rows
Exemple #5
0
    async def view_get(self, request, name, hash, **kwargs):
        try:
            as_json = kwargs.pop("as_json")
        except KeyError:
            as_json = False
        extra_template_data = {}
        start = time.time()
        status_code = 200
        templates = []
        try:
            response_or_template_contexts = await self.data(
                request, name, hash, **kwargs)
            if isinstance(response_or_template_contexts,
                          response.HTTPResponse):
                return response_or_template_contexts

            else:
                data, extra_template_data, templates = response_or_template_contexts
        except InterruptedError as e:
            raise DatasetteError(str(e), title="SQL Interrupted", status=400)
        except (sqlite3.OperationalError, InvalidSql) as e:
            raise DatasetteError(str(e), title="Invalid SQL", status=400)

        except (sqlite3.OperationalError) as e:
            raise DatasetteError(str(e))

        except DatasetteError:
            raise

        end = time.time()
        data["query_ms"] = (end - start) * 1000
        for key in ("source", "source_url", "license", "license_url"):
            value = self.ds.metadata.get(key)
            if value:
                data[key] = value
        if as_json:
            # Special case for .jsono extension - redirect to _shape=objects
            if as_json == ".jsono":
                return self.redirect(
                    request,
                    path_with_added_args(
                        request,
                        {"_shape": "objects"},
                        path=request.path.rsplit(".jsono", 1)[0] + ".json",
                    ),
                    forward_querystring=False,
                )

            # Deal with the _shape option
            shape = request.args.get("_shape", "arrays")
            if shape in ("objects", "object", "array"):
                columns = data.get("columns")
                rows = data.get("rows")
                if rows and columns:
                    data["rows"] = [dict(zip(columns, row)) for row in rows]
                if shape == "object":
                    error = None
                    if "primary_keys" not in data:
                        error = "_shape=object is only available on tables"
                    else:
                        pks = data["primary_keys"]
                        if not pks:
                            error = "_shape=object not available for tables with no primary keys"
                        else:
                            object_rows = {}
                            for row in data["rows"]:
                                pk_string = path_from_row_pks(
                                    row, pks, not pks)
                                object_rows[pk_string] = row
                            data = object_rows
                    if error:
                        data = {
                            "ok": False,
                            "error": error,
                            "database": name,
                            "database_hash": hash,
                        }
                elif shape == "array":
                    data = data["rows"]
            elif shape == "arrays":
                pass
            else:
                status_code = 400
                data = {
                    "ok": False,
                    "error": "Invalid _shape: {}".format(shape),
                    "status": 400,
                    "title": None,
                }
            headers = {}
            if self.ds.cors:
                headers["Access-Control-Allow-Origin"] = "*"
            r = response.HTTPResponse(
                json.dumps(data, cls=CustomJSONEncoder),
                status=status_code,
                content_type="application/json",
                headers=headers,
            )
        else:
            extras = {}
            if callable(extra_template_data):
                extras = extra_template_data()
                if asyncio.iscoroutine(extras):
                    extras = await extras
            else:
                extras = extra_template_data
            context = {
                **data,
                **extras,
                **{
                    "url_json": path_with_ext(request, ".json"),
                    "url_jsono": path_with_ext(request, ".jsono"),
                    "extra_css_urls": self.ds.extra_css_urls(),
                    "extra_js_urls": self.ds.extra_js_urls(),
                    "datasette_version": __version__,
                }
            }
            if "metadata" not in context:
                context["metadata"] = self.ds.metadata
            r = self.render(templates, **context)
            r.status = status_code
        # Set far-future cache expiry
        if self.ds.cache_headers:
            r.headers["Cache-Control"] = "max-age={}".format(365 * 24 * 60 *
                                                             60)
        return r
Exemple #6
0
    async def view_get(self, request, name, hash, **kwargs):
        # If ?_format= is provided, use that as the format
        _format = request.args.get("_format", None)
        if not _format:
            _format = (kwargs.pop("as_format", None) or "").lstrip(".")
        if "table_and_format" in kwargs:
            table, _ext_format = resolve_table_and_format(
                table_and_format=urllib.parse.unquote_plus(
                    kwargs["table_and_format"]),
                table_exists=lambda t: self.ds.table_exists(name, t))
            _format = _format or _ext_format
            kwargs["table"] = table
            del kwargs["table_and_format"]
        elif "table" in kwargs:
            kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"])

        if _format == "csv":
            return await self.as_csv(request, name, hash, **kwargs)

        if _format is None:
            # HTML views default to expanding all forign key labels
            kwargs['default_labels'] = True

        extra_template_data = {}
        start = time.time()
        status_code = 200
        templates = []
        try:
            response_or_template_contexts = await self.data(
                request, name, hash, **kwargs)
            if isinstance(response_or_template_contexts,
                          response.HTTPResponse):
                return response_or_template_contexts

            else:
                data, extra_template_data, templates = response_or_template_contexts
        except InterruptedError as e:
            raise DatasetteError("""
                SQL query took too long. The time limit is controlled by the
                <a href="https://datasette.readthedocs.io/en/stable/config.html#sql-time-limit-ms">sql_time_limit_ms</a>
                configuration option.
            """,
                                 title="SQL Interrupted",
                                 status=400,
                                 messagge_is_html=True)
        except (sqlite3.OperationalError, InvalidSql) as e:
            raise DatasetteError(str(e), title="Invalid SQL", status=400)

        except (sqlite3.OperationalError) as e:
            raise DatasetteError(str(e))

        except DatasetteError:
            raise

        end = time.time()
        data["query_ms"] = (end - start) * 1000
        for key in ("source", "source_url", "license", "license_url"):
            value = self.ds.metadata.get(key)
            if value:
                data[key] = value
        if _format in ("json", "jsono"):
            # Special case for .jsono extension - redirect to _shape=objects
            if _format == "jsono":
                return self.redirect(
                    request,
                    path_with_added_args(
                        request,
                        {"_shape": "objects"},
                        path=request.path.rsplit(".jsono", 1)[0] + ".json",
                    ),
                    forward_querystring=False,
                )

            # Handle the _json= parameter which may modify data["rows"]
            json_cols = []
            if "_json" in request.args:
                json_cols = request.args["_json"]
            if json_cols and "rows" in data and "columns" in data:
                data["rows"] = convert_specific_columns_to_json(
                    data["rows"],
                    data["columns"],
                    json_cols,
                )

            # unless _json_infinity=1 requested, replace infinity with None
            if "rows" in data and not value_as_boolean(
                    request.args.get("_json_infinity", "0")):
                data["rows"] = [remove_infinites(row) for row in data["rows"]]

            # Deal with the _shape option
            shape = request.args.get("_shape", "arrays")
            if shape == "arrayfirst":
                data = [row[0] for row in data["rows"]]
            elif shape in ("objects", "object", "array"):
                columns = data.get("columns")
                rows = data.get("rows")
                if rows and columns:
                    data["rows"] = [dict(zip(columns, row)) for row in rows]
                if shape == "object":
                    error = None
                    if "primary_keys" not in data:
                        error = "_shape=object is only available on tables"
                    else:
                        pks = data["primary_keys"]
                        if not pks:
                            error = "_shape=object not available for tables with no primary keys"
                        else:
                            object_rows = {}
                            for row in data["rows"]:
                                pk_string = path_from_row_pks(
                                    row, pks, not pks)
                                object_rows[pk_string] = row
                            data = object_rows
                    if error:
                        data = {
                            "ok": False,
                            "error": error,
                            "database": name,
                            "database_hash": hash,
                        }
                elif shape == "array":
                    data = data["rows"]
            elif shape == "arrays":
                pass
            else:
                status_code = 400
                data = {
                    "ok": False,
                    "error": "Invalid _shape: {}".format(shape),
                    "status": 400,
                    "title": None,
                }
            headers = {}
            if self.ds.cors:
                headers["Access-Control-Allow-Origin"] = "*"
            r = response.HTTPResponse(
                json.dumps(data, cls=CustomJSONEncoder),
                status=status_code,
                content_type="application/json",
                headers=headers,
            )
        else:
            extras = {}
            if callable(extra_template_data):
                extras = extra_template_data()
                if asyncio.iscoroutine(extras):
                    extras = await extras
            else:
                extras = extra_template_data
            url_labels_extra = {}
            if data.get("expandable_columns"):
                url_labels_extra = {"_labels": "on"}
            url_csv_args = {"_size": "max", **url_labels_extra}
            url_csv = path_with_format(request, "csv", url_csv_args)
            url_csv_path = url_csv.split('?')[0]
            context = {
                **data,
                **extras,
                **{
                    "url_json":
                    path_with_format(request, "json", {
                        **url_labels_extra,
                    }),
                    "url_csv":
                    url_csv,
                    "url_csv_path":
                    url_csv_path,
                    "url_csv_args":
                    url_csv_args,
                    "extra_css_urls":
                    self.ds.extra_css_urls(),
                    "extra_js_urls":
                    self.ds.extra_js_urls(),
                    "datasette_version":
                    __version__,
                    "config":
                    self.ds.config_dict(),
                }
            }
            if "metadata" not in context:
                context["metadata"] = self.ds.metadata
            r = self.render(templates, **context)
            r.status = status_code
        # Set far-future cache expiry
        if self.ds.cache_headers:
            ttl = request.args.get("_ttl", None)
            if ttl is None or not ttl.isdigit():
                ttl = self.ds.config("default_cache_ttl")
            else:
                ttl = int(ttl)
            if ttl == 0:
                ttl_header = 'no-cache'
            else:
                ttl_header = 'max-age={}'.format(ttl)
            r.headers["Cache-Control"] = ttl_header
        r.headers["Referrer-Policy"] = "no-referrer"
        return r
Exemple #7
0
    async def view_get(self, request, name, hash, **kwargs):
        try:
            as_json = kwargs.pop("as_json")
        except KeyError:
            as_json = False
        extra_template_data = {}
        start = time.time()
        status_code = 200
        templates = []
        try:
            response_or_template_contexts = await self.data(
                request, name, hash, **kwargs
            )
            if isinstance(response_or_template_contexts, response.HTTPResponse):
                return response_or_template_contexts

            else:
                data, extra_template_data, templates = response_or_template_contexts
        except InterruptedError as e:
            raise DatasetteError("""
                SQL query took too long. The time limit is controlled by the
                <a href="https://datasette.readthedocs.io/en/stable/config.html#sql-time-limit-ms">sql_time_limit_ms</a>
                configuration option.
            """, title="SQL Interrupted", status=400, messagge_is_html=True)
        except (sqlite3.OperationalError, InvalidSql) as e:
            raise DatasetteError(str(e), title="Invalid SQL", status=400)

        except (sqlite3.OperationalError) as e:
            raise DatasetteError(str(e))

        except DatasetteError:
            raise

        end = time.time()
        data["query_ms"] = (end - start) * 1000
        for key in ("source", "source_url", "license", "license_url"):
            value = self.ds.metadata.get(key)
            if value:
                data[key] = value
        if as_json:
            # Special case for .jsono extension - redirect to _shape=objects
            if as_json == ".jsono":
                return self.redirect(
                    request,
                    path_with_added_args(
                        request,
                        {"_shape": "objects"},
                        path=request.path.rsplit(".jsono", 1)[0] + ".json",
                    ),
                    forward_querystring=False,
                )

            # Handle the _json= parameter which may modify data["rows"]
            json_cols = []
            if "_json" in request.args:
                json_cols = request.args["_json"]
            if json_cols and "rows" in data and "columns" in data:
                data["rows"] = convert_specific_columns_to_json(
                    data["rows"], data["columns"], json_cols,
                )

            # Deal with the _shape option
            shape = request.args.get("_shape", "arrays")
            if shape == "arrayfirst":
                data = [row[0] for row in data["rows"]]
            elif shape in ("objects", "object", "array"):
                columns = data.get("columns")
                rows = data.get("rows")
                if rows and columns:
                    data["rows"] = [dict(zip(columns, row)) for row in rows]
                if shape == "object":
                    error = None
                    if "primary_keys" not in data:
                        error = "_shape=object is only available on tables"
                    else:
                        pks = data["primary_keys"]
                        if not pks:
                            error = "_shape=object not available for tables with no primary keys"
                        else:
                            object_rows = {}
                            for row in data["rows"]:
                                pk_string = path_from_row_pks(row, pks, not pks)
                                object_rows[pk_string] = row
                            data = object_rows
                    if error:
                        data = {
                            "ok": False,
                            "error": error,
                            "database": name,
                            "database_hash": hash,
                        }
                elif shape == "array":
                    data = data["rows"]
            elif shape == "arrays":
                pass
            else:
                status_code = 400
                data = {
                    "ok": False,
                    "error": "Invalid _shape: {}".format(shape),
                    "status": 400,
                    "title": None,
                }
            headers = {}
            if self.ds.cors:
                headers["Access-Control-Allow-Origin"] = "*"
            r = response.HTTPResponse(
                json.dumps(data, cls=CustomJSONEncoder),
                status=status_code,
                content_type="application/json",
                headers=headers,
            )
        else:
            extras = {}
            if callable(extra_template_data):
                extras = extra_template_data()
                if asyncio.iscoroutine(extras):
                    extras = await extras
            else:
                extras = extra_template_data
            context = {
                **data,
                **extras,
                **{
                    "url_json": path_with_ext(request, ".json"),
                    "url_jsono": path_with_ext(request, ".jsono"),
                    "extra_css_urls": self.ds.extra_css_urls(),
                    "extra_js_urls": self.ds.extra_js_urls(),
                    "datasette_version": __version__,
                }
            }
            if "metadata" not in context:
                context["metadata"] = self.ds.metadata
            r = self.render(templates, **context)
            r.status = status_code
        # Set far-future cache expiry
        if self.ds.cache_headers:
            ttl = request.args.get("_ttl", None)
            if ttl is None or not ttl.isdigit():
                ttl = self.ds.config["default_cache_ttl"]
            else:
                ttl = int(ttl)
            if ttl == 0:
                ttl_header = 'no-cache'
            else:
                ttl_header = 'max-age={}'.format(ttl)
            r.headers["Cache-Control"] = ttl_header
        r.headers["Referrer-Policy"] = "no-referrer"
        return r
Exemple #8
0
 async def stream_fn(r):
     nonlocal data
     writer = csv.writer(LimitedWriter(r,
                                       self.ds.setting("max_csv_mb")))
     first = True
     next = None
     while first or (next and stream):
         try:
             if next:
                 kwargs["_next"] = next
             if not first:
                 data, _, _ = await self.data(request, database, hash,
                                              **kwargs)
             if first:
                 await writer.writerow(headings)
                 first = False
             next = data.get("next")
             for row in data["rows"]:
                 if any(isinstance(r, bytes) for r in row):
                     new_row = []
                     for column, cell in zip(headings, row):
                         if isinstance(cell, bytes):
                             # If this is a table page, use .urls.row_blob()
                             if data.get("table"):
                                 pks = data.get("primary_keys") or []
                                 cell = self.ds.absolute_url(
                                     request,
                                     self.ds.urls.row_blob(
                                         database,
                                         data["table"],
                                         path_from_row_pks(
                                             row, pks, not pks),
                                         column,
                                     ),
                                 )
                             else:
                                 # Otherwise generate URL for this query
                                 cell = self.ds.absolute_url(
                                     request,
                                     path_with_format(
                                         request=request,
                                         format="blob",
                                         extra_qs={
                                             "_blob_column":
                                             column,
                                             "_blob_hash":
                                             hashlib.sha256(
                                                 cell).hexdigest(),
                                         },
                                         replace_format="csv",
                                     ),
                                 )
                         new_row.append(cell)
                     row = new_row
                 if not expanded_columns:
                     # Simple path
                     await writer.writerow(row)
                 else:
                     # Look for {"value": "label": } dicts and expand
                     new_row = []
                     for heading, cell in zip(data["columns"], row):
                         if heading in expanded_columns:
                             if cell is None:
                                 new_row.extend(("", ""))
                             else:
                                 assert isinstance(cell, dict)
                                 new_row.append(cell["value"])
                                 new_row.append(cell["label"])
                         else:
                             new_row.append(cell)
                     await writer.writerow(new_row)
         except Exception as e:
             print("caught this", e)
             await r.write(str(e))
             return
Exemple #9
0
async def display_columns_and_rows(
    datasette,
    database_name,
    table_name,
    description,
    rows,
    link_column=False,
    truncate_cells=0,
    sortable_columns=None,
):
    """Returns columns, rows for specified table - including fancy foreign key treatment"""
    sortable_columns = sortable_columns or set()
    db = datasette.databases[database_name]
    table_metadata = datasette.table_metadata(database_name, table_name)
    column_descriptions = table_metadata.get("columns") or {}
    column_details = {
        col.name: col
        for col in await db.table_column_details(table_name)
    }
    pks = await db.primary_keys(table_name)
    pks_for_display = pks
    if not pks_for_display:
        pks_for_display = ["rowid"]

    columns = []
    for r in description:
        if r[0] == "rowid" and "rowid" not in column_details:
            type_ = "integer"
            notnull = 0
        else:
            type_ = column_details[r[0]].type
            notnull = column_details[r[0]].notnull
        columns.append({
            "name": r[0],
            "sortable": r[0] in sortable_columns,
            "is_pk": r[0] in pks_for_display,
            "type": type_,
            "notnull": notnull,
            "description": column_descriptions.get(r[0]),
        })

    column_to_foreign_key_table = {
        fk["column"]: fk["other_table"]
        for fk in await db.foreign_keys_for_table(table_name)
    }

    cell_rows = []
    base_url = datasette.setting("base_url")
    for row in rows:
        cells = []
        # Unless we are a view, the first column is a link - either to the rowid
        # or to the simple or compound primary key
        if link_column:
            is_special_link_column = len(pks) != 1
            pk_path = path_from_row_pks(row, pks, not pks, False)
            cells.append({
                "column":
                pks[0] if len(pks) == 1 else "Link",
                "value_type":
                "pk",
                "is_special_link_column":
                is_special_link_column,
                "raw":
                pk_path,
                "value":
                markupsafe.Markup(
                    '<a href="{table_path}/{flat_pks_quoted}">{flat_pks}</a>'.
                    format(
                        base_url=base_url,
                        table_path=datasette.urls.table(
                            database_name, table_name),
                        flat_pks=str(markupsafe.escape(pk_path)),
                        flat_pks_quoted=path_from_row_pks(row, pks, not pks),
                    )),
            })

        for value, column_dict in zip(row, columns):
            column = column_dict["name"]
            if link_column and len(pks) == 1 and column == pks[0]:
                # If there's a simple primary key, don't repeat the value as it's
                # already shown in the link column.
                continue

            # First let the plugins have a go
            # pylint: disable=no-member
            plugin_display_value = None
            for candidate in pm.hook.render_cell(
                    value=value,
                    column=column,
                    table=table_name,
                    database=database_name,
                    datasette=datasette,
            ):
                candidate = await await_me_maybe(candidate)
                if candidate is not None:
                    plugin_display_value = candidate
                    break
            if plugin_display_value:
                display_value = plugin_display_value
            elif isinstance(value, bytes):
                formatted = format_bytes(len(value))
                display_value = markupsafe.Markup(
                    '<a class="blob-download" href="{}"{}>&lt;Binary:&nbsp;{:,}&nbsp;byte{}&gt;</a>'
                    .format(
                        datasette.urls.row_blob(
                            database_name,
                            table_name,
                            path_from_row_pks(row, pks, not pks),
                            column,
                        ),
                        ' title="{}"'.format(formatted)
                        if "bytes" not in formatted else "",
                        len(value),
                        "" if len(value) == 1 else "s",
                    ))
            elif isinstance(value, dict):
                # It's an expanded foreign key - display link to other row
                label = value["label"]
                value = value["value"]
                # The table we link to depends on the column
                other_table = column_to_foreign_key_table[column]
                link_template = LINK_WITH_LABEL if (
                    label != value) else LINK_WITH_VALUE
                display_value = markupsafe.Markup(
                    link_template.format(
                        database=database_name,
                        base_url=base_url,
                        table=tilde_encode(other_table),
                        link_id=tilde_encode(str(value)),
                        id=str(markupsafe.escape(value)),
                        label=str(markupsafe.escape(label)) or "-",
                    ))
            elif value in ("", None):
                display_value = markupsafe.Markup("&nbsp;")
            elif is_url(str(value).strip()):
                display_value = markupsafe.Markup(
                    '<a href="{url}">{url}</a>'.format(
                        url=markupsafe.escape(value.strip())))
            elif column in table_metadata.get("units", {}) and value != "":
                # Interpret units using pint
                value = value * ureg(table_metadata["units"][column])
                # Pint uses floating point which sometimes introduces errors in the compact
                # representation, which we have to round off to avoid ugliness. In the vast
                # majority of cases this rounding will be inconsequential. I hope.
                value = round(value.to_compact(), 6)
                display_value = markupsafe.Markup(f"{value:~P}".replace(
                    " ", "&nbsp;"))
            else:
                display_value = str(value)
                if truncate_cells and len(display_value) > truncate_cells:
                    display_value = display_value[:truncate_cells] + "\u2026"

            cells.append({
                "column":
                column,
                "value":
                display_value,
                "raw":
                value,
                "value_type":
                "none" if value is None else str(type(value).__name__),
            })
        cell_rows.append(Row(cells))

    if link_column:
        # Add the link column header.
        # If it's a simple primary key, we have to remove and re-add that column name at
        # the beginning of the header row.
        first_column = None
        if len(pks) == 1:
            columns = [col for col in columns if col["name"] != pks[0]]
            first_column = {
                "name": pks[0],
                "sortable": len(pks) == 1,
                "is_pk": True,
                "type": column_details[pks[0]].type,
                "notnull": column_details[pks[0]].notnull,
            }
        else:
            first_column = {
                "name": "Link",
                "sortable": False,
                "is_pk": False,
                "type": "",
                "notnull": 0,
            }
        columns = [first_column] + columns
    return columns, cell_rows
Exemple #10
0
def test_path_from_row_pks(row, pks, expected_path):
    actual_path = utils.path_from_row_pks(row, pks, False)
    assert expected_path == actual_path
Exemple #11
0
    async def _data_traced(
        self,
        request,
        default_labels=False,
        _next=None,
        _size=None,
    ):
        database_route = tilde_decode(request.url_vars["database"])
        table_name = tilde_decode(request.url_vars["table"])
        try:
            db = self.ds.get_database(route=database_route)
        except KeyError:
            raise NotFound("Database not found: {}".format(database_route))
        database_name = db.name

        # For performance profiling purposes, ?_noparallel=1 turns off asyncio.gather
        async def _gather_parallel(*args):
            return await asyncio.gather(*args)

        async def _gather_sequential(*args):
            results = []
            for fn in args:
                results.append(await fn)
            return results

        gather = (_gather_sequential
                  if request.args.get("_noparallel") else _gather_parallel)

        # If this is a canned query, not a table, then dispatch to QueryView instead
        canned_query = await self.ds.get_canned_query(database_name,
                                                      table_name,
                                                      request.actor)
        if canned_query:
            return await QueryView(self.ds).data(
                request,
                canned_query["sql"],
                metadata=canned_query,
                editable=False,
                canned_query=table_name,
                named_parameters=canned_query.get("params"),
                write=bool(canned_query.get("write")),
            )

        is_view, table_exists = map(
            bool,
            await gather(db.get_view_definition(table_name),
                         db.table_exists(table_name)),
        )

        # If table or view not found, return 404
        if not is_view and not table_exists:
            raise NotFound(f"Table not found: {table_name}")

        # Ensure user has permission to view this table
        await self.ds.ensure_permissions(
            request.actor,
            [
                ("view-table", (database_name, table_name)),
                ("view-database", database_name),
                "view-instance",
            ],
        )

        private = not await self.ds.permission_allowed(
            None, "view-table", (database_name, table_name), default=True)

        # Handle ?_filter_column and redirect, if present
        redirect_params = filters_should_redirect(request.args)
        if redirect_params:
            return self.redirect(
                request,
                path_with_added_args(request, redirect_params),
                forward_querystring=False,
            )

        # If ?_sort_by_desc=on (from checkbox) redirect to _sort_desc=(_sort)
        if "_sort_by_desc" in request.args:
            return self.redirect(
                request,
                path_with_added_args(
                    request,
                    {
                        "_sort_desc": request.args.get("_sort"),
                        "_sort_by_desc": None,
                        "_sort": None,
                    },
                ),
                forward_querystring=False,
            )

        # Introspect columns and primary keys for table
        pks = await db.primary_keys(table_name)
        table_columns = await db.table_columns(table_name)

        # Take ?_col= and ?_nocol= into account
        specified_columns = await self.columns_to_select(
            table_columns, pks, request)
        select_specified_columns = ", ".join(
            escape_sqlite(t) for t in specified_columns)
        select_all_columns = ", ".join(escape_sqlite(t) for t in table_columns)

        # rowid tables (no specified primary key) need a different SELECT
        use_rowid = not pks and not is_view
        if use_rowid:
            select_specified_columns = f"rowid, {select_specified_columns}"
            select_all_columns = f"rowid, {select_all_columns}"
            order_by = "rowid"
            order_by_pks = "rowid"
        else:
            order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks])
            order_by = order_by_pks

        if is_view:
            order_by = ""

        nocount = request.args.get("_nocount")
        nofacet = request.args.get("_nofacet")
        nosuggest = request.args.get("_nosuggest")

        if request.args.get("_shape") in ("array", "object"):
            nocount = True
            nofacet = True

        table_metadata = self.ds.table_metadata(database_name, table_name)
        units = table_metadata.get("units", {})

        # Arguments that start with _ and don't contain a __ are
        # special - things like ?_search= - and should not be
        # treated as filters.
        filter_args = []
        for key in request.args:
            if not (key.startswith("_") and "__" not in key):
                for v in request.args.getlist(key):
                    filter_args.append((key, v))

        # Build where clauses from query string arguments
        filters = Filters(sorted(filter_args), units, ureg)
        where_clauses, params = filters.build_where_clauses(table_name)

        # Execute filters_from_request plugin hooks - including the default
        # ones that live in datasette/filters.py
        extra_context_from_filters = {}
        extra_human_descriptions = []

        for hook in pm.hook.filters_from_request(
                request=request,
                table=table_name,
                database=database_name,
                datasette=self.ds,
        ):
            filter_arguments = await await_me_maybe(hook)
            if filter_arguments:
                where_clauses.extend(filter_arguments.where_clauses)
                params.update(filter_arguments.params)
                extra_human_descriptions.extend(
                    filter_arguments.human_descriptions)
                extra_context_from_filters.update(
                    filter_arguments.extra_context)

        # Deal with custom sort orders
        sortable_columns = await self.sortable_columns_for_table(
            database_name, table_name, use_rowid)
        sort = request.args.get("_sort")
        sort_desc = request.args.get("_sort_desc")

        if not sort and not sort_desc:
            sort = table_metadata.get("sort")
            sort_desc = table_metadata.get("sort_desc")

        if sort and sort_desc:
            raise DatasetteError(
                "Cannot use _sort and _sort_desc at the same time")

        if sort:
            if sort not in sortable_columns:
                raise DatasetteError(f"Cannot sort table by {sort}")

            order_by = escape_sqlite(sort)

        if sort_desc:
            if sort_desc not in sortable_columns:
                raise DatasetteError(f"Cannot sort table by {sort_desc}")

            order_by = f"{escape_sqlite(sort_desc)} desc"

        from_sql = "from {table_name} {where}".format(
            table_name=escape_sqlite(table_name),
            where=("where {} ".format(" and ".join(where_clauses)))
            if where_clauses else "",
        )
        # Copy of params so we can mutate them later:
        from_sql_params = dict(**params)

        count_sql = f"select count(*) {from_sql}"

        # Handle pagination driven by ?_next=
        _next = _next or request.args.get("_next")
        offset = ""
        if _next:
            sort_value = None
            if is_view:
                # _next is an offset
                offset = f" offset {int(_next)}"
            else:
                components = urlsafe_components(_next)
                # If a sort order is applied and there are multiple components,
                # the first of these is the sort value
                if (sort or sort_desc) and (len(components) > 1):
                    sort_value = components[0]
                    # Special case for if non-urlencoded first token was $null
                    if _next.split(",")[0] == "$null":
                        sort_value = None
                    components = components[1:]

                # Figure out the SQL for next-based-on-primary-key first
                next_by_pk_clauses = []
                if use_rowid:
                    next_by_pk_clauses.append(f"rowid > :p{len(params)}")
                    params[f"p{len(params)}"] = components[0]
                else:
                    # Apply the tie-breaker based on primary keys
                    if len(components) == len(pks):
                        param_len = len(params)
                        next_by_pk_clauses.append(
                            compound_keys_after_sql(pks, param_len))
                        for i, pk_value in enumerate(components):
                            params[f"p{param_len + i}"] = pk_value

                # Now add the sort SQL, which may incorporate next_by_pk_clauses
                if sort or sort_desc:
                    if sort_value is None:
                        if sort_desc:
                            # Just items where column is null ordered by pk
                            where_clauses.append(
                                "({column} is null and {next_clauses})".format(
                                    column=escape_sqlite(sort_desc),
                                    next_clauses=" and ".join(
                                        next_by_pk_clauses),
                                ))
                        else:
                            where_clauses.append(
                                "({column} is not null or ({column} is null and {next_clauses}))"
                                .format(
                                    column=escape_sqlite(sort),
                                    next_clauses=" and ".join(
                                        next_by_pk_clauses),
                                ))
                    else:
                        where_clauses.append(
                            "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))"
                            .format(
                                column=escape_sqlite(sort or sort_desc),
                                op=">" if sort else "<",
                                p=len(params),
                                extra_desc_only=""
                                if sort else " or {column2} is null".format(
                                    column2=escape_sqlite(sort or sort_desc)),
                                next_clauses=" and ".join(next_by_pk_clauses),
                            ))
                        params[f"p{len(params)}"] = sort_value
                    order_by = f"{order_by}, {order_by_pks}"
                else:
                    where_clauses.extend(next_by_pk_clauses)

        where_clause = ""
        if where_clauses:
            where_clause = f"where {' and '.join(where_clauses)} "

        if order_by:
            order_by = f"order by {order_by}"

        extra_args = {}
        # Handle ?_size=500
        page_size = _size or request.args.get("_size") or table_metadata.get(
            "size")
        if page_size:
            if page_size == "max":
                page_size = self.ds.max_returned_rows
            try:
                page_size = int(page_size)
                if page_size < 0:
                    raise ValueError

            except ValueError:
                raise BadRequest("_size must be a positive integer")

            if page_size > self.ds.max_returned_rows:
                raise BadRequest(
                    f"_size must be <= {self.ds.max_returned_rows}")

            extra_args["page_size"] = page_size
        else:
            page_size = self.ds.page_size

        # Facets are calculated against SQL without order by or limit
        sql_no_order_no_limit = (
            "select {select_all_columns} from {table_name} {where}".format(
                select_all_columns=select_all_columns,
                table_name=escape_sqlite(table_name),
                where=where_clause,
            ))

        # This is the SQL that populates the main table on the page
        sql = "select {select_specified_columns} from {table_name} {where}{order_by} limit {page_size}{offset}".format(
            select_specified_columns=select_specified_columns,
            table_name=escape_sqlite(table_name),
            where=where_clause,
            order_by=order_by,
            page_size=page_size + 1,
            offset=offset,
        )

        if request.args.get("_timelimit"):
            extra_args["custom_time_limit"] = int(
                request.args.get("_timelimit"))

        # Execute the main query!
        results = await db.execute(sql, params, truncate=True, **extra_args)

        # Calculate the total count for this query
        filtered_table_rows_count = None
        if (not db.is_mutable and self.ds.inspect_data
                and count_sql == f"select count(*) from {table_name} "):
            # We can use a previously cached table row count
            try:
                filtered_table_rows_count = self.ds.inspect_data[
                    database_name]["tables"][table_name]["count"]
            except KeyError:
                pass

        # Otherwise run a select count(*) ...
        if count_sql and filtered_table_rows_count is None and not nocount:
            try:
                count_rows = list(await db.execute(count_sql, from_sql_params))
                filtered_table_rows_count = count_rows[0][0]
            except QueryInterrupted:
                pass

        # Faceting
        if not self.ds.setting("allow_facet") and any(
                arg.startswith("_facet") for arg in request.args):
            raise BadRequest("_facet= is not allowed")

        # pylint: disable=no-member
        facet_classes = list(
            itertools.chain.from_iterable(pm.hook.register_facet_classes()))
        facet_results = {}
        facets_timed_out = []
        facet_instances = []
        for klass in facet_classes:
            facet_instances.append(
                klass(
                    self.ds,
                    request,
                    database_name,
                    sql=sql_no_order_no_limit,
                    params=params,
                    table=table_name,
                    metadata=table_metadata,
                    row_count=filtered_table_rows_count,
                ))

        async def execute_facets():
            if not nofacet:
                # Run them in parallel
                facet_awaitables = [
                    facet.facet_results() for facet in facet_instances
                ]
                facet_awaitable_results = await gather(*facet_awaitables)
                for (
                        instance_facet_results,
                        instance_facets_timed_out,
                ) in facet_awaitable_results:
                    for facet_info in instance_facet_results:
                        base_key = facet_info["name"]
                        key = base_key
                        i = 1
                        while key in facet_results:
                            i += 1
                            key = f"{base_key}_{i}"
                        facet_results[key] = facet_info
                    facets_timed_out.extend(instance_facets_timed_out)

        suggested_facets = []

        async def execute_suggested_facets():
            # Calculate suggested facets
            if (self.ds.setting("suggest_facets")
                    and self.ds.setting("allow_facet") and not _next
                    and not nofacet and not nosuggest):
                # Run them in parallel
                facet_suggest_awaitables = [
                    facet.suggest() for facet in facet_instances
                ]
                for suggest_result in await gather(*facet_suggest_awaitables):
                    suggested_facets.extend(suggest_result)

        await gather(execute_facets(), execute_suggested_facets())

        # Figure out columns and rows for the query
        columns = [r[0] for r in results.description]
        rows = list(results.rows)

        # Expand labeled columns if requested
        expanded_columns = []
        expandable_columns = await self.expandable_columns(
            database_name, table_name)
        columns_to_expand = None
        try:
            all_labels = value_as_boolean(request.args.get("_labels", ""))
        except ValueError:
            all_labels = default_labels
        # Check for explicit _label=
        if "_label" in request.args:
            columns_to_expand = request.args.getlist("_label")
        if columns_to_expand is None and all_labels:
            # expand all columns with foreign keys
            columns_to_expand = [fk["column"] for fk, _ in expandable_columns]

        if columns_to_expand:
            expanded_labels = {}
            for fk, _ in expandable_columns:
                column = fk["column"]
                if column not in columns_to_expand:
                    continue
                if column not in columns:
                    continue
                expanded_columns.append(column)
                # Gather the values
                column_index = columns.index(column)
                values = [row[column_index] for row in rows]
                # Expand them
                expanded_labels.update(await self.ds.expand_foreign_keys(
                    database_name, table_name, column, values))
            if expanded_labels:
                # Rewrite the rows
                new_rows = []
                for row in rows:
                    new_row = CustomRow(columns)
                    for column in row.keys():
                        value = row[column]
                        if (column, value
                            ) in expanded_labels and value is not None:
                            new_row[column] = {
                                "value": value,
                                "label": expanded_labels[(column, value)],
                            }
                        else:
                            new_row[column] = value
                    new_rows.append(new_row)
                rows = new_rows

        # Pagination next link
        next_value = None
        next_url = None
        if 0 < page_size < len(rows):
            if is_view:
                next_value = int(_next or 0) + page_size
            else:
                next_value = path_from_row_pks(rows[-2], pks, use_rowid)
            # If there's a sort or sort_desc, add that value as a prefix
            if (sort or sort_desc) and not is_view:
                prefix = rows[-2][sort or sort_desc]
                if isinstance(prefix, dict) and "value" in prefix:
                    prefix = prefix["value"]
                if prefix is None:
                    prefix = "$null"
                else:
                    prefix = tilde_encode(str(prefix))
                next_value = f"{prefix},{next_value}"
                added_args = {"_next": next_value}
                if sort:
                    added_args["_sort"] = sort
                else:
                    added_args["_sort_desc"] = sort_desc
            else:
                added_args = {"_next": next_value}
            next_url = self.ds.absolute_url(
                request,
                self.ds.urls.path(path_with_replaced_args(request,
                                                          added_args)))
            rows = rows[:page_size]

        # human_description_en combines filters AND search, if provided
        human_description_en = filters.human_description_en(
            extra=extra_human_descriptions)

        if sort or sort_desc:
            sorted_by = "sorted by {}{}".format(
                (sort or sort_desc), " descending" if sort_desc else "")
            human_description_en = " ".join(
                [b for b in [human_description_en, sorted_by] if b])

        async def extra_template():
            nonlocal sort

            display_columns, display_rows = await display_columns_and_rows(
                self.ds,
                database_name,
                table_name,
                results.description,
                rows,
                link_column=not is_view,
                truncate_cells=self.ds.setting("truncate_cells_html"),
                sortable_columns=await
                self.sortable_columns_for_table(database_name,
                                                table_name,
                                                use_rowid=True),
            )
            metadata = ((self.ds.metadata("databases")
                         or {}).get(database_name,
                                    {}).get("tables", {}).get(table_name, {}))
            self.ds.update_with_inherited_metadata(metadata)

            form_hidden_args = []
            for key in request.args:
                if (key.startswith("_")
                        and key not in ("_sort", "_search", "_next")
                        and "__" not in key):
                    for value in request.args.getlist(key):
                        form_hidden_args.append((key, value))

            # if no sort specified AND table has a single primary key,
            # set sort to that so arrow is displayed
            if not sort and not sort_desc:
                if 1 == len(pks):
                    sort = pks[0]
                elif use_rowid:
                    sort = "rowid"

            async def table_actions():
                links = []
                for hook in pm.hook.table_actions(
                        datasette=self.ds,
                        table=table_name,
                        database=database_name,
                        actor=request.actor,
                        request=request,
                ):
                    extra_links = await await_me_maybe(hook)
                    if extra_links:
                        links.extend(extra_links)
                return links

            # filter_columns combine the columns we know are available
            # in the table with any additional columns (such as rowid)
            # which are available in the query
            filter_columns = list(columns) + [
                table_column for table_column in table_columns
                if table_column not in columns
            ]
            d = {
                "table_actions":
                table_actions,
                "use_rowid":
                use_rowid,
                "filters":
                filters,
                "display_columns":
                display_columns,
                "filter_columns":
                filter_columns,
                "display_rows":
                display_rows,
                "facets_timed_out":
                facets_timed_out,
                "sorted_facet_results":
                sorted(
                    facet_results.values(),
                    key=lambda f: (len(f["results"]), f["name"]),
                    reverse=True,
                ),
                "form_hidden_args":
                form_hidden_args,
                "is_sortable":
                any(c["sortable"] for c in display_columns),
                "fix_path":
                self.ds.urls.path,
                "path_with_replaced_args":
                path_with_replaced_args,
                "path_with_removed_args":
                path_with_removed_args,
                "append_querystring":
                append_querystring,
                "request":
                request,
                "sort":
                sort,
                "sort_desc":
                sort_desc,
                "disable_sort":
                is_view,
                "custom_table_templates": [
                    f"_table-{to_css_class(database_name)}-{to_css_class(table_name)}.html",
                    f"_table-table-{to_css_class(database_name)}-{to_css_class(table_name)}.html",
                    "_table.html",
                ],
                "metadata":
                metadata,
                "view_definition":
                await db.get_view_definition(table_name),
                "table_definition":
                await db.get_table_definition(table_name),
                "datasette_allow_facet":
                "true" if self.ds.setting("allow_facet") else "false",
            }
            d.update(extra_context_from_filters)
            return d

        return (
            {
                "database":
                database_name,
                "table":
                table_name,
                "is_view":
                is_view,
                "human_description_en":
                human_description_en,
                "rows":
                rows[:page_size],
                "truncated":
                results.truncated,
                "filtered_table_rows_count":
                filtered_table_rows_count,
                "expanded_columns":
                expanded_columns,
                "expandable_columns":
                expandable_columns,
                "columns":
                columns,
                "primary_keys":
                pks,
                "units":
                units,
                "query": {
                    "sql": sql,
                    "params": params
                },
                "facet_results":
                facet_results,
                "suggested_facets":
                suggested_facets,
                "next":
                next_value and str(next_value) or None,
                "next_url":
                next_url,
                "private":
                private,
                "allow_execute_sql":
                await self.ds.permission_allowed(request.actor,
                                                 "execute-sql",
                                                 database_name,
                                                 default=True),
            },
            extra_template,
            (
                f"table-{to_css_class(database_name)}-{to_css_class(table_name)}.html",
                "table.html",
            ),
        )
Exemple #12
0
    async def data(self, request, name, hash, table, default_labels=False,  _next=None, _size=None):
        canned_query = self.ds.get_canned_query(name, table)
        if canned_query is not None:
            return await self.custom_sql(
                request,
                name,
                hash,
                canned_query["sql"],
                editable=False,
                canned_query=table,
            )

        is_view = bool(await self.ds.get_view_definition(name, table))
        info = self.ds.inspect()
        table_info = info[name]["tables"].get(table) or {}
        if not is_view and not table_info:
            raise NotFound("Table not found: {}".format(table))

        pks = table_info.get("primary_keys") or []
        use_rowid = not pks and not is_view
        if use_rowid:
            select = "rowid, *"
            order_by = "rowid"
            order_by_pks = "rowid"
        else:
            select = "*"
            order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks])
            order_by = order_by_pks

        if is_view:
            order_by = ""

        # We roll our own query_string decoder because by default Sanic
        # drops anything with an empty value e.g. ?name__exact=
        args = RequestParameters(
            urllib.parse.parse_qs(request.query_string, keep_blank_values=True)
        )

        # Special args start with _ and do not contain a __
        # That's so if there is a column that starts with _
        # it can still be queried using ?_col__exact=blah
        special_args = {}
        special_args_lists = {}
        other_args = {}
        for key, value in args.items():
            if key.startswith("_") and "__" not in key:
                special_args[key] = value[0]
                special_args_lists[key] = value
            else:
                other_args[key] = value[0]

        # Handle ?_filter_column and redirect, if present
        redirect_params = filters_should_redirect(special_args)
        if redirect_params:
            return self.redirect(
                request,
                path_with_added_args(request, redirect_params),
                forward_querystring=False,
            )

        # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort)
        if "_sort_by_desc" in special_args:
            return self.redirect(
                request,
                path_with_added_args(
                    request,
                    {
                        "_sort_desc": special_args.get("_sort"),
                        "_sort_by_desc": None,
                        "_sort": None,
                    },
                ),
                forward_querystring=False,
            )

        table_metadata = self.table_metadata(name, table)
        units = table_metadata.get("units", {})
        filters = Filters(sorted(other_args.items()), units, ureg)
        where_clauses, params = filters.build_where_clauses()

        # _search support:
        fts_table = info[name]["tables"].get(table, {}).get("fts_table")
        search_args = dict(
            pair for pair in special_args.items() if pair[0].startswith("_search")
        )
        search_descriptions = []
        search = ""
        if fts_table and search_args:
            if "_search" in search_args:
                # Simple ?_search=xxx
                search = search_args["_search"]
                where_clauses.append(
                    "rowid in (select rowid from {fts_table} where {fts_table} match :search)".format(
                        fts_table=escape_sqlite(fts_table),
                    )
                )
                search_descriptions.append('search matches "{}"'.format(search))
                params["search"] = search
            else:
                # More complex: search against specific columns
                valid_columns = set(info[name]["tables"][fts_table]["columns"])
                for i, (key, search_text) in enumerate(search_args.items()):
                    search_col = key.split("_search_", 1)[1]
                    if search_col not in valid_columns:
                        raise DatasetteError("Cannot search by that column", status=400)

                    where_clauses.append(
                        "rowid in (select rowid from {fts_table} where {search_col} match :search_{i})".format(
                            fts_table=escape_sqlite(fts_table),
                            search_col=escape_sqlite(search_col),
                            i=i
                        )
                    )
                    search_descriptions.append(
                        'search column "{}" matches "{}"'.format(
                            search_col, search_text
                        )
                    )
                    params["search_{}".format(i)] = search_text

        table_rows_count = None
        sortable_columns = set()
        if not is_view:
            table_rows_count = table_info["count"]
            sortable_columns = self.sortable_columns_for_table(name, table, use_rowid)

        # Allow for custom sort order
        sort = special_args.get("_sort")
        if sort:
            if sort not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort))

            order_by = escape_sqlite(sort)
        sort_desc = special_args.get("_sort_desc")
        if sort_desc:
            if sort_desc not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort_desc))

            if sort:
                raise DatasetteError("Cannot use _sort and _sort_desc at the same time")

            order_by = "{} desc".format(escape_sqlite(sort_desc))

        from_sql = "from {table_name} {where}".format(
            table_name=escape_sqlite(table),
            where=(
                "where {} ".format(" and ".join(where_clauses))
            ) if where_clauses else "",
        )
        # Store current params and where_clauses for later:
        from_sql_params = dict(**params)
        from_sql_where_clauses = where_clauses[:]

        count_sql = "select count(*) {}".format(from_sql)

        _next = _next or special_args.get("_next")
        offset = ""
        if _next:
            if is_view:
                # _next is an offset
                offset = " offset {}".format(int(_next))
            else:
                components = urlsafe_components(_next)
                # If a sort order is applied, the first of these is the sort value
                if sort or sort_desc:
                    sort_value = components[0]
                    # Special case for if non-urlencoded first token was $null
                    if _next.split(",")[0] == "$null":
                        sort_value = None
                    components = components[1:]

                # Figure out the SQL for next-based-on-primary-key first
                next_by_pk_clauses = []
                if use_rowid:
                    next_by_pk_clauses.append("rowid > :p{}".format(len(params)))
                    params["p{}".format(len(params))] = components[0]
                else:
                    # Apply the tie-breaker based on primary keys
                    if len(components) == len(pks):
                        param_len = len(params)
                        next_by_pk_clauses.append(
                            compound_keys_after_sql(pks, param_len)
                        )
                        for i, pk_value in enumerate(components):
                            params["p{}".format(param_len + i)] = pk_value

                # Now add the sort SQL, which may incorporate next_by_pk_clauses
                if sort or sort_desc:
                    if sort_value is None:
                        if sort_desc:
                            # Just items where column is null ordered by pk
                            where_clauses.append(
                                "({column} is null and {next_clauses})".format(
                                    column=escape_sqlite(sort_desc),
                                    next_clauses=" and ".join(next_by_pk_clauses),
                                )
                            )
                        else:
                            where_clauses.append(
                                "({column} is not null or ({column} is null and {next_clauses}))".format(
                                    column=escape_sqlite(sort),
                                    next_clauses=" and ".join(next_by_pk_clauses),
                                )
                            )
                    else:
                        where_clauses.append(
                            "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format(
                                column=escape_sqlite(sort or sort_desc),
                                op=">" if sort else "<",
                                p=len(params),
                                extra_desc_only="" if sort else " or {column2} is null".format(
                                    column2=escape_sqlite(sort or sort_desc)
                                ),
                                next_clauses=" and ".join(next_by_pk_clauses),
                            )
                        )
                        params["p{}".format(len(params))] = sort_value
                    order_by = "{}, {}".format(order_by, order_by_pks)
                else:
                    where_clauses.extend(next_by_pk_clauses)

        where_clause = ""
        if where_clauses:
            where_clause = "where {} ".format(" and ".join(where_clauses))

        if order_by:
            order_by = "order by {} ".format(order_by)

        # _group_count=col1&_group_count=col2
        group_count = special_args_lists.get("_group_count") or []
        if group_count:
            sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format(
                group_cols=", ".join(
                    '"{}"'.format(group_count_col) for group_count_col in group_count
                ),
                table_name=escape_sqlite(table),
                where=where_clause,
            )
            return await self.custom_sql(request, name, hash, sql, editable=True)

        extra_args = {}
        # Handle ?_size=500
        page_size = _size or request.raw_args.get("_size")
        if page_size:
            if page_size == "max":
                page_size = self.max_returned_rows
            try:
                page_size = int(page_size)
                if page_size < 0:
                    raise ValueError

            except ValueError:
                raise DatasetteError("_size must be a positive integer", status=400)

            if page_size > self.max_returned_rows:
                raise DatasetteError(
                    "_size must be <= {}".format(self.max_returned_rows), status=400
                )

            extra_args["page_size"] = page_size
        else:
            page_size = self.page_size

        sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format(
            select=select,
            table_name=escape_sqlite(table),
            where=where_clause,
            order_by=order_by,
            limit=page_size + 1,
            offset=offset,
        )

        if request.raw_args.get("_timelimit"):
            extra_args["custom_time_limit"] = int(request.raw_args["_timelimit"])

        results = await self.ds.execute(
            name, sql, params, truncate=True, **extra_args
        )

        # facets support
        facet_size = self.ds.config["default_facet_size"]
        metadata_facets = table_metadata.get("facets", [])
        facets = metadata_facets[:]
        if request.args.get("_facet") and not self.ds.config["allow_facet"]:
            raise DatasetteError("_facet= is not allowed", status=400)
        try:
            facets.extend(request.args["_facet"])
        except KeyError:
            pass
        facet_results = {}
        facets_timed_out = []
        for column in facets:
            if _next:
                continue
            facet_sql = """
                select {col} as value, count(*) as count
                {from_sql} {and_or_where} {col} is not null
                group by {col} order by count desc limit {limit}
            """.format(
                col=escape_sqlite(column),
                from_sql=from_sql,
                and_or_where='and' if from_sql_where_clauses else 'where',
                limit=facet_size+1,
            )
            try:
                facet_rows_results = await self.ds.execute(
                    name, facet_sql, params,
                    truncate=False,
                    custom_time_limit=self.ds.config["facet_time_limit_ms"],
                )
                facet_results_values = []
                facet_results[column] = {
                    "name": column,
                    "results": facet_results_values,
                    "truncated": len(facet_rows_results) > facet_size,
                }
                facet_rows = facet_rows_results.rows[:facet_size]
                # Attempt to expand foreign keys into labels
                values = [row["value"] for row in facet_rows]
                expanded = (await self.expand_foreign_keys(
                    name, table, column, values
                ))
                for row in facet_rows:
                    selected = str(other_args.get(column)) == str(row["value"])
                    if selected:
                        toggle_path = path_with_removed_args(
                            request, {column: str(row["value"])}
                        )
                    else:
                        toggle_path = path_with_added_args(
                            request, {column: row["value"]}
                        )
                    facet_results_values.append({
                        "value": row["value"],
                        "label": expanded.get(
                            (column, row["value"]),
                            row["value"]
                        ),
                        "count": row["count"],
                        "toggle_url": urllib.parse.urljoin(
                            request.url, toggle_path
                        ),
                        "selected": selected,
                    })
            except InterruptedError:
                facets_timed_out.append(column)

        columns = [r[0] for r in results.description]
        rows = list(results.rows)

        filter_columns = columns[:]
        if use_rowid and filter_columns[0] == "rowid":
            filter_columns = filter_columns[1:]

        # Expand labeled columns if requested
        expanded_columns = []
        expandable_columns = self.expandable_columns(name, table)
        columns_to_expand = None
        try:
            all_labels = value_as_boolean(special_args.get("_labels", ""))
        except ValueError:
            all_labels = default_labels
        # Check for explicit _label=
        if "_label" in request.args:
            columns_to_expand = request.args["_label"]
        if columns_to_expand is None and all_labels:
            # expand all columns with foreign keys
            columns_to_expand = [
                fk["column"] for fk, _ in expandable_columns
            ]

        if columns_to_expand:
            expanded_labels = {}
            for fk, label_column in expandable_columns:
                column = fk["column"]
                if column not in columns_to_expand:
                    continue
                expanded_columns.append(column)
                # Gather the values
                column_index = columns.index(column)
                values = [row[column_index] for row in rows]
                # Expand them
                expanded_labels.update(await self.expand_foreign_keys(
                    name, table, column, values
                ))
            if expanded_labels:
                # Rewrite the rows
                new_rows = []
                for row in rows:
                    new_row = CustomRow(columns)
                    for column in row.keys():
                        value = row[column]
                        if (column, value) in expanded_labels:
                            new_row[column] = {
                                'value': value,
                                'label': expanded_labels[(column, value)]
                            }
                        else:
                            new_row[column] = value
                    new_rows.append(new_row)
                rows = new_rows

        # Pagination next link
        next_value = None
        next_url = None
        if len(rows) > page_size and page_size > 0:
            if is_view:
                next_value = int(_next or 0) + page_size
            else:
                next_value = path_from_row_pks(rows[-2], pks, use_rowid)
            # If there's a sort or sort_desc, add that value as a prefix
            if (sort or sort_desc) and not is_view:
                prefix = rows[-2][sort or sort_desc]
                if prefix is None:
                    prefix = "$null"
                else:
                    prefix = urllib.parse.quote_plus(str(prefix))
                next_value = "{},{}".format(prefix, next_value)
                added_args = {"_next": next_value}
                if sort:
                    added_args["_sort"] = sort
                else:
                    added_args["_sort_desc"] = sort_desc
            else:
                added_args = {"_next": next_value}
            next_url = urllib.parse.urljoin(
                request.url, path_with_replaced_args(request, added_args)
            )
            rows = rows[:page_size]

        # Number of filtered rows in whole set:
        filtered_table_rows_count = None
        if count_sql:
            try:
                count_rows = list(await self.ds.execute(
                    name, count_sql, from_sql_params
                ))
                filtered_table_rows_count = count_rows[0][0]
            except InterruptedError:
                pass

            # Detect suggested facets
            suggested_facets = []
            if self.ds.config["suggest_facets"] and self.ds.config["allow_facet"]:
                for facet_column in columns:
                    if facet_column in facets:
                        continue
                    if _next:
                        continue
                    if not self.ds.config["suggest_facets"]:
                        continue
                    suggested_facet_sql = '''
                        select distinct {column} {from_sql}
                        {and_or_where} {column} is not null
                        limit {limit}
                    '''.format(
                        column=escape_sqlite(facet_column),
                        from_sql=from_sql,
                        and_or_where='and' if from_sql_where_clauses else 'where',
                        limit=facet_size+1
                    )
                    distinct_values = None
                    try:
                        distinct_values = await self.ds.execute(
                            name, suggested_facet_sql, from_sql_params,
                            truncate=False,
                            custom_time_limit=self.ds.config["facet_suggest_time_limit_ms"],
                        )
                        num_distinct_values = len(distinct_values)
                        if (
                            num_distinct_values and
                            num_distinct_values > 1 and
                            num_distinct_values <= facet_size and
                            num_distinct_values < filtered_table_rows_count
                        ):
                            suggested_facets.append({
                                'name': facet_column,
                                'toggle_url': path_with_added_args(
                                    request, {'_facet': facet_column}
                                ),
                            })
                    except InterruptedError:
                        pass

        # human_description_en combines filters AND search, if provided
        human_description_en = filters.human_description_en(extra=search_descriptions)

        if sort or sort_desc:
            sorted_by = "sorted by {}{}".format(
                (sort or sort_desc), " descending" if sort_desc else ""
            )
            human_description_en = " ".join(
                [b for b in [human_description_en, sorted_by] if b]
            )

        async def extra_template():
            display_columns, display_rows = await self.display_columns_and_rows(
                name,
                table,
                results.description,
                rows,
                link_column=not is_view,
            )
            metadata = self.ds.metadata.get("databases", {}).get(name, {}).get(
                "tables", {}
            ).get(
                table, {}
            )
            self.ds.update_with_inherited_metadata(metadata)
            return {
                "database_hash": hash,
                "supports_search": bool(fts_table),
                "search": search or "",
                "use_rowid": use_rowid,
                "filters": filters,
                "display_columns": display_columns,
                "filter_columns": filter_columns,
                "display_rows": display_rows,
                "facets_timed_out": facets_timed_out,
                "sorted_facet_results": sorted(
                    facet_results.values(),
                    key=lambda f: (len(f["results"]), f["name"]),
                    reverse=True
                ),
                "facet_hideable": lambda facet: facet not in metadata_facets,
                "is_sortable": any(c["sortable"] for c in display_columns),
                "path_with_replaced_args": path_with_replaced_args,
                "path_with_removed_args": path_with_removed_args,
                "append_querystring": append_querystring,
                "request": request,
                "sort": sort,
                "sort_desc": sort_desc,
                "disable_sort": is_view,
                "custom_rows_and_columns_templates": [
                    "_rows_and_columns-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)
                    ),
                    "_rows_and_columns-table-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)
                    ),
                    "_rows_and_columns.html",
                ],
                "metadata": metadata,
                "view_definition": await self.ds.get_view_definition(name, table),
                "table_definition": await self.ds.get_table_definition(name, table),
            }

        return {
            "database": name,
            "table": table,
            "is_view": is_view,
            "human_description_en": human_description_en,
            "rows": rows[:page_size],
            "truncated": results.truncated,
            "table_rows_count": table_rows_count,
            "filtered_table_rows_count": filtered_table_rows_count,
            "expanded_columns": expanded_columns,
            "expandable_columns": expandable_columns,
            "columns": columns,
            "primary_keys": pks,
            "units": units,
            "query": {"sql": sql, "params": params},
            "facet_results": facet_results,
            "suggested_facets": suggested_facets,
            "next": next_value and str(next_value) or None,
            "next_url": next_url,
        }, extra_template, (
            "table-{}-{}.html".format(to_css_class(name), to_css_class(table)),
            "table.html",
        )
Exemple #13
0
    async def display_columns_and_rows(
        self,
        database,
        table,
        description,
        rows,
        link_column=False,
    ):
        "Returns columns, rows for specified table - including fancy foreign key treatment"
        table_metadata = self.table_metadata(database, table)
        info = self.ds.inspect()[database]
        sortable_columns = self.sortable_columns_for_table(database, table, True)
        columns = [
            {"name": r[0], "sortable": r[0] in sortable_columns} for r in description
        ]
        tables = info["tables"]
        table_info = tables.get(table) or {}
        pks = table_info.get("primary_keys") or []
        column_to_foreign_key_table = {
            fk["column"]: fk["other_table"]
            for fk in table_info.get(
                "foreign_keys", {}
            ).get("outgoing", None) or []
        }

        cell_rows = []
        for row in rows:
            cells = []
            # Unless we are a view, the first column is a link - either to the rowid
            # or to the simple or compound primary key
            if link_column:
                cells.append(
                    {
                        "column": pks[0] if len(pks) == 1 else "Link",
                        "value": jinja2.Markup(
                            '<a href="/{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
                                database=database,
                                table=urllib.parse.quote_plus(table),
                                flat_pks=str(
                                    jinja2.escape(
                                        path_from_row_pks(row, pks, not pks, False)
                                    )
                                ),
                                flat_pks_quoted=path_from_row_pks(row, pks, not pks),
                            )
                        ),
                    }
                )

            for value, column_dict in zip(row, columns):
                column = column_dict["name"]
                if link_column and len(pks) == 1 and column == pks[0]:
                    # If there's a simple primary key, don't repeat the value as it's
                    # already shown in the link column.
                    continue

                if isinstance(value, dict):
                    # It's an expanded foreign key - display link to other row
                    label = value["label"]
                    value = value["value"]
                    # The table we link to depends on the column
                    other_table = column_to_foreign_key_table[column]
                    link_template = (
                        LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE
                    )
                    display_value = jinja2.Markup(link_template.format(
                        database=database,
                        table=urllib.parse.quote_plus(other_table),
                        link_id=urllib.parse.quote_plus(str(value)),
                        id=str(jinja2.escape(value)),
                        label=str(jinja2.escape(label)),
                    ))
                elif value in ("", None):
                    display_value = jinja2.Markup("&nbsp;")
                elif is_url(str(value).strip()):
                    display_value = jinja2.Markup(
                        '<a href="{url}">{url}</a>'.format(
                            url=jinja2.escape(value.strip())
                        )
                    )
                elif column in table_metadata.get("units", {}) and value != "":
                    # Interpret units using pint
                    value = value * ureg(table_metadata["units"][column])
                    # Pint uses floating point which sometimes introduces errors in the compact
                    # representation, which we have to round off to avoid ugliness. In the vast
                    # majority of cases this rounding will be inconsequential. I hope.
                    value = round(value.to_compact(), 6)
                    display_value = jinja2.Markup(
                        "{:~P}".format(value).replace(" ", "&nbsp;")
                    )
                else:
                    display_value = str(value)

                cells.append({"column": column, "value": display_value})
            cell_rows.append(cells)

        if link_column:
            # Add the link column header.
            # If it's a simple primary key, we have to remove and re-add that column name at
            # the beginning of the header row.
            if len(pks) == 1:
                columns = [col for col in columns if col["name"] != pks[0]]

            columns = [
                {"name": pks[0] if len(pks) == 1 else "Link", "sortable": len(pks) == 1}
            ] + columns
        return columns, cell_rows
Exemple #14
0
    async def display_columns_and_rows(
        self,
        database,
        table,
        description,
        rows,
        link_column=False,
        expand_foreign_keys=True,
    ):
        "Returns columns, rows for specified table - including fancy foreign key treatment"
        table_metadata = self.table_metadata(database, table)
        info = self.ds.inspect()[database]
        sortable_columns = self.sortable_columns_for_table(
            database, table, True)
        columns = [{
            "name": r[0],
            "sortable": r[0] in sortable_columns
        } for r in description]
        tables = info["tables"]
        table_info = tables.get(table) or {}
        pks = table_info.get("primary_keys") or []

        # Prefetch foreign key resolutions for later expansion:
        fks = {}
        labeled_fks = {}
        if table_info and expand_foreign_keys:
            foreign_keys = table_info["foreign_keys"]["outgoing"]
            for fk in foreign_keys:
                label_column = (
                    # First look in metadata.json definition for this foreign key table:
                    self.table_metadata(database,
                                        fk["other_table"]).get("label_column")
                    # Fall back to label_column from .inspect() detection:
                    or tables.get(fk["other_table"], {}).get("label_column"))
                if not label_column:
                    # No label for this FK
                    fks[fk["column"]] = fk["other_table"]
                    continue

                ids_to_lookup = set([row[fk["column"]] for row in rows])
                sql = '''
                    select {other_column}, {label_column}
                    from {other_table}
                    where {other_column} in ({placeholders})
                '''.format(
                    other_column=escape_sqlite(fk["other_column"]),
                    label_column=escape_sqlite(label_column),
                    other_table=escape_sqlite(fk["other_table"]),
                    placeholders=", ".join(["?"] * len(ids_to_lookup)),
                )
                try:
                    results = await self.execute(database, sql,
                                                 list(set(ids_to_lookup)))
                except InterruptedError:
                    pass
                else:
                    for id, value in results:
                        labeled_fks[(fk["column"], id)] = (fk["other_table"],
                                                           value)

        cell_rows = []
        for row in rows:
            cells = []
            # Unless we are a view, the first column is a link - either to the rowid
            # or to the simple or compound primary key
            if link_column:
                cells.append({
                    "column":
                    pks[0] if len(pks) == 1 else "Link",
                    "value":
                    jinja2.Markup(
                        '<a href="/{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'
                        .format(
                            database=database,
                            table=urllib.parse.quote_plus(table),
                            flat_pks=str(
                                jinja2.escape(
                                    path_from_row_pks(row, pks, not pks,
                                                      False))),
                            flat_pks_quoted=path_from_row_pks(
                                row, pks, not pks),
                        )),
                })

            for value, column_dict in zip(row, columns):
                column = column_dict["name"]
                if link_column and len(pks) == 1 and column == pks[0]:
                    # If there's a simple primary key, don't repeat the value as it's
                    # already shown in the link column.
                    continue

                if (column, value) in labeled_fks:
                    other_table, label = labeled_fks[(column, value)]
                    display_value = jinja2.Markup(
                        '<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'
                        .format(
                            database=database,
                            table=urllib.parse.quote_plus(other_table),
                            link_id=urllib.parse.quote_plus(str(value)),
                            id=str(jinja2.escape(value)),
                            label=str(jinja2.escape(label)),
                        ))
                elif column in fks:
                    display_value = jinja2.Markup(
                        '<a href="/{database}/{table}/{link_id}">{id}</a>'.
                        format(
                            database=database,
                            table=urllib.parse.quote_plus(fks[column]),
                            link_id=urllib.parse.quote_plus(str(value)),
                            id=str(jinja2.escape(value)),
                        ))
                elif value is None:
                    display_value = jinja2.Markup("&nbsp;")
                elif is_url(str(value).strip()):
                    display_value = jinja2.Markup(
                        '<a href="{url}">{url}</a>'.format(
                            url=jinja2.escape(value.strip())))
                elif column in table_metadata.get("units", {}) and value != "":
                    # Interpret units using pint
                    value = value * ureg(table_metadata["units"][column])
                    # Pint uses floating point which sometimes introduces errors in the compact
                    # representation, which we have to round off to avoid ugliness. In the vast
                    # majority of cases this rounding will be inconsequential. I hope.
                    value = round(value.to_compact(), 6)
                    display_value = jinja2.Markup(
                        "{:~P}".format(value).replace(" ", "&nbsp;"))
                else:
                    display_value = str(value)

                cells.append({"column": column, "value": display_value})
            cell_rows.append(cells)

        if link_column:
            # Add the link column header.
            # If it's a simple primary key, we have to remove and re-add that column name at
            # the beginning of the header row.
            if len(pks) == 1:
                columns = [col for col in columns if col["name"] != pks[0]]

            columns = [{
                "name": pks[0] if len(pks) == 1 else "Link",
                "sortable": len(pks) == 1
            }] + columns
        return columns, cell_rows
Exemple #15
0
    async def data(self, request, name, hash, table):
        table = urllib.parse.unquote_plus(table)
        canned_query = self.ds.get_canned_query(name, table)
        if canned_query is not None:
            return await self.custom_sql(
                request,
                name,
                hash,
                canned_query["sql"],
                editable=False,
                canned_query=table,
            )

        is_view = bool(
            list(await self.execute(
                name,
                "SELECT count(*) from sqlite_master WHERE type = 'view' and name=:n",
                {"n": table},
            ))[0][0])
        view_definition = None
        table_definition = None
        if is_view:
            view_definition = list(await self.execute(
                name,
                'select sql from sqlite_master where name = :n and type="view"',
                {"n": table},
            ))[0][0]
        else:
            table_definition_rows = list(await self.execute(
                name,
                'select sql from sqlite_master where name = :n and type="table"',
                {"n": table},
            ))
            if not table_definition_rows:
                raise NotFound("Table not found: {}".format(table))

            table_definition = table_definition_rows[0][0]
        info = self.ds.inspect()
        table_info = info[name]["tables"].get(table) or {}
        pks = table_info.get("primary_keys") or []
        use_rowid = not pks and not is_view
        if use_rowid:
            select = "rowid, *"
            order_by = "rowid"
            order_by_pks = "rowid"
        else:
            select = "*"
            order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks])
            order_by = order_by_pks

        if is_view:
            order_by = ""

        # We roll our own query_string decoder because by default Sanic
        # drops anything with an empty value e.g. ?name__exact=
        args = RequestParameters(
            urllib.parse.parse_qs(request.query_string,
                                  keep_blank_values=True))

        # Special args start with _ and do not contain a __
        # That's so if there is a column that starts with _
        # it can still be queried using ?_col__exact=blah
        special_args = {}
        special_args_lists = {}
        other_args = {}
        for key, value in args.items():
            if key.startswith("_") and "__" not in key:
                special_args[key] = value[0]
                special_args_lists[key] = value
            else:
                other_args[key] = value[0]

        # Handle ?_filter_column and redirect, if present
        redirect_params = filters_should_redirect(special_args)
        if redirect_params:
            return self.redirect(
                request,
                path_with_added_args(request, redirect_params),
                forward_querystring=False,
            )

        # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort)
        if "_sort_by_desc" in special_args:
            return self.redirect(
                request,
                path_with_added_args(
                    request,
                    {
                        "_sort_desc": special_args.get("_sort"),
                        "_sort_by_desc": None,
                        "_sort": None,
                    },
                ),
                forward_querystring=False,
            )

        table_metadata = self.table_metadata(name, table)
        units = table_metadata.get("units", {})
        filters = Filters(sorted(other_args.items()), units, ureg)
        where_clauses, params = filters.build_where_clauses()

        # _search support:
        fts_table = info[name]["tables"].get(table, {}).get("fts_table")
        search_args = dict(pair for pair in special_args.items()
                           if pair[0].startswith("_search"))
        search_descriptions = []
        search = ""
        if fts_table and search_args:
            if "_search" in search_args:
                # Simple ?_search=xxx
                search = search_args["_search"]
                where_clauses.append(
                    "rowid in (select rowid from [{fts_table}] where [{fts_table}] match :search)"
                    .format(fts_table=fts_table))
                search_descriptions.append(
                    'search matches "{}"'.format(search))
                params["search"] = search
            else:
                # More complex: search against specific columns
                valid_columns = set(info[name]["tables"][fts_table]["columns"])
                for i, (key, search_text) in enumerate(search_args.items()):
                    search_col = key.split("_search_", 1)[1]
                    if search_col not in valid_columns:
                        raise DatasetteError("Cannot search by that column",
                                             status=400)

                    where_clauses.append(
                        "rowid in (select rowid from [{fts_table}] where [{search_col}] match :search_{i})"
                        .format(fts_table=fts_table,
                                search_col=search_col,
                                i=i))
                    search_descriptions.append(
                        'search column "{}" matches "{}"'.format(
                            search_col, search_text))
                    params["search_{}".format(i)] = search_text

        table_rows_count = None
        sortable_columns = set()
        if not is_view:
            table_rows_count = table_info["count"]
            sortable_columns = self.sortable_columns_for_table(
                name, table, use_rowid)

        # Allow for custom sort order
        sort = special_args.get("_sort")
        if sort:
            if sort not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort))

            order_by = escape_sqlite(sort)
        sort_desc = special_args.get("_sort_desc")
        if sort_desc:
            if sort_desc not in sortable_columns:
                raise DatasetteError(
                    "Cannot sort table by {}".format(sort_desc))

            if sort:
                raise DatasetteError(
                    "Cannot use _sort and _sort_desc at the same time")

            order_by = "{} desc".format(escape_sqlite(sort_desc))

        from_sql = "from {table_name} {where}".format(
            table_name=escape_sqlite(table),
            where=("where {} ".format(" and ".join(where_clauses)))
            if where_clauses else "",
        )
        count_sql = "select count(*) {}".format(from_sql)

        _next = special_args.get("_next")
        offset = ""
        if _next:
            if is_view:
                # _next is an offset
                offset = " offset {}".format(int(_next))
            else:
                components = urlsafe_components(_next)
                # If a sort order is applied, the first of these is the sort value
                if sort or sort_desc:
                    sort_value = components[0]
                    # Special case for if non-urlencoded first token was $null
                    if _next.split(",")[0] == "$null":
                        sort_value = None
                    components = components[1:]

                # Figure out the SQL for next-based-on-primary-key first
                next_by_pk_clauses = []
                if use_rowid:
                    next_by_pk_clauses.append("rowid > :p{}".format(
                        len(params)))
                    params["p{}".format(len(params))] = components[0]
                else:
                    # Apply the tie-breaker based on primary keys
                    if len(components) == len(pks):
                        param_len = len(params)
                        next_by_pk_clauses.append(
                            compound_keys_after_sql(pks, param_len))
                        for i, pk_value in enumerate(components):
                            params["p{}".format(param_len + i)] = pk_value

                # Now add the sort SQL, which may incorporate next_by_pk_clauses
                if sort or sort_desc:
                    if sort_value is None:
                        if sort_desc:
                            # Just items where column is null ordered by pk
                            where_clauses.append(
                                "({column} is null and {next_clauses})".format(
                                    column=escape_sqlite(sort_desc),
                                    next_clauses=" and ".join(
                                        next_by_pk_clauses),
                                ))
                        else:
                            where_clauses.append(
                                "({column} is not null or ({column} is null and {next_clauses}))"
                                .format(
                                    column=escape_sqlite(sort),
                                    next_clauses=" and ".join(
                                        next_by_pk_clauses),
                                ))
                    else:
                        where_clauses.append(
                            "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))"
                            .format(
                                column=escape_sqlite(sort or sort_desc),
                                op=">" if sort else "<",
                                p=len(params),
                                extra_desc_only=""
                                if sort else " or {column2} is null".format(
                                    column2=escape_sqlite(sort or sort_desc)),
                                next_clauses=" and ".join(next_by_pk_clauses),
                            ))
                        params["p{}".format(len(params))] = sort_value
                    order_by = "{}, {}".format(order_by, order_by_pks)
                else:
                    where_clauses.extend(next_by_pk_clauses)

        where_clause = ""
        if where_clauses:
            where_clause = "where {} ".format(" and ".join(where_clauses))

        if order_by:
            order_by = "order by {} ".format(order_by)

        # _group_count=col1&_group_count=col2
        group_count = special_args_lists.get("_group_count") or []
        if group_count:
            sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format(
                group_cols=", ".join('"{}"'.format(group_count_col)
                                     for group_count_col in group_count),
                table_name=escape_sqlite(table),
                where=where_clause,
            )
            return await self.custom_sql(request,
                                         name,
                                         hash,
                                         sql,
                                         editable=True)

        extra_args = {}
        # Handle ?_page_size=500
        page_size = request.raw_args.get("_size")
        if page_size:
            if page_size == "max":
                page_size = self.max_returned_rows
            try:
                page_size = int(page_size)
                if page_size < 0:
                    raise ValueError

            except ValueError:
                raise DatasetteError("_size must be a positive integer",
                                     status=400)

            if page_size > self.max_returned_rows:
                raise DatasetteError("_size must be <= {}".format(
                    self.max_returned_rows),
                                     status=400)

            extra_args["page_size"] = page_size
        else:
            page_size = self.page_size

        sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format(
            select=select,
            table_name=escape_sqlite(table),
            where=where_clause,
            order_by=order_by,
            limit=page_size + 1,
            offset=offset,
        )

        if request.raw_args.get("_timelimit"):
            extra_args["custom_time_limit"] = int(
                request.raw_args["_timelimit"])

        rows, truncated, description = await self.execute(name,
                                                          sql,
                                                          params,
                                                          truncate=True,
                                                          **extra_args)

        # facets support
        try:
            facets = request.args["_facet"]
        except KeyError:
            facets = table_metadata.get("facets", [])
        facet_results = {}
        for column in facets:
            facet_sql = """
                select {col} as value, count(*) as count
                {from_sql}
                group by {col} order by count desc limit 20
            """.format(col=escape_sqlite(column), from_sql=from_sql)
            try:
                facet_rows = await self.execute(name,
                                                facet_sql,
                                                params,
                                                truncate=False,
                                                custom_time_limit=200)
                facet_results[column] = [{
                    "value":
                    row["value"],
                    "count":
                    row["count"],
                    "toggle_url":
                    urllib.parse.urljoin(
                        request.url,
                        path_with_added_args(request, {column: row["value"]}),
                    ),
                } for row in facet_rows]
            except sqlite3.OperationalError:
                # Hit time limit
                pass

        columns = [r[0] for r in description]
        rows = list(rows)

        filter_columns = columns[:]
        if use_rowid and filter_columns[0] == "rowid":
            filter_columns = filter_columns[1:]

        # Pagination next link
        next_value = None
        next_url = None
        if len(rows) > page_size and page_size > 0:
            if is_view:
                next_value = int(_next or 0) + page_size
            else:
                next_value = path_from_row_pks(rows[-2], pks, use_rowid)
            # If there's a sort or sort_desc, add that value as a prefix
            if (sort or sort_desc) and not is_view:
                prefix = rows[-2][sort or sort_desc]
                if prefix is None:
                    prefix = "$null"
                else:
                    prefix = urllib.parse.quote_plus(str(prefix))
                next_value = "{},{}".format(prefix, next_value)
                added_args = {"_next": next_value}
                if sort:
                    added_args["_sort"] = sort
                else:
                    added_args["_sort_desc"] = sort_desc
            else:
                added_args = {"_next": next_value}
            next_url = urllib.parse.urljoin(
                request.url, path_with_added_args(request, added_args))
            rows = rows[:page_size]

        # Number of filtered rows in whole set:
        filtered_table_rows_count = None
        if count_sql:
            try:
                count_rows = list(await self.execute(name, count_sql, params))
                filtered_table_rows_count = count_rows[0][0]
            except sqlite3.OperationalError:
                # Almost certainly hit the timeout
                pass

        # human_description_en combines filters AND search, if provided
        human_description_en = filters.human_description_en(
            extra=search_descriptions)

        if sort or sort_desc:
            sorted_by = "sorted by {}{}".format(
                (sort or sort_desc), " descending" if sort_desc else "")
            human_description_en = " ".join(
                [b for b in [human_description_en, sorted_by] if b])

        async def extra_template():
            display_columns, display_rows = await self.display_columns_and_rows(
                name,
                table,
                description,
                rows,
                link_column=not is_view,
                expand_foreign_keys=True,
            )
            metadata = self.ds.metadata.get("databases",
                                            {}).get(name,
                                                    {}).get("tables",
                                                            {}).get(table, {})
            self.ds.update_with_inherited_metadata(metadata)
            return {
                "database_hash":
                hash,
                "supports_search":
                bool(fts_table),
                "search":
                search or "",
                "use_rowid":
                use_rowid,
                "filters":
                filters,
                "display_columns":
                display_columns,
                "filter_columns":
                filter_columns,
                "display_rows":
                display_rows,
                "is_sortable":
                any(c["sortable"] for c in display_columns),
                "path_with_added_args":
                path_with_added_args,
                "request":
                request,
                "sort":
                sort,
                "sort_desc":
                sort_desc,
                "disable_sort":
                is_view,
                "custom_rows_and_columns_templates": [
                    "_rows_and_columns-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)),
                    "_rows_and_columns-table-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)),
                    "_rows_and_columns.html",
                ],
                "metadata":
                metadata,
            }

        return {
            "database": name,
            "table": table,
            "is_view": is_view,
            "view_definition": view_definition,
            "table_definition": table_definition,
            "human_description_en": human_description_en,
            "rows": rows[:page_size],
            "truncated": truncated,
            "table_rows_count": table_rows_count,
            "filtered_table_rows_count": filtered_table_rows_count,
            "columns": columns,
            "primary_keys": pks,
            "units": units,
            "query": {
                "sql": sql,
                "params": params
            },
            "facet_results": facet_results,
            "next": next_value and str(next_value) or None,
            "next_url": next_url,
        }, extra_template, (
            "table-{}-{}.html".format(to_css_class(name), to_css_class(table)),
            "table.html",
        )