def test_path_with_added_args(path, added_args, expected): request = Request( path.encode('utf8'), {}, '1.1', 'GET', None ) actual = utils.path_with_added_args(request, added_args) assert expected == actual
async def suggest(self): columns = await self.get_columns(self.sql, self.params) suggested_facets = [] already_enabled = [c["config"]["simple"] for c in self.get_configs()] for column in columns: if column in already_enabled: continue # Is every value in this column either null or a JSON array? suggested_facet_sql = """ select distinct json_type({column}) from ({sql}) where {column} is not null and {column} != '' """.format(column=escape_sqlite(column), sql=self.sql) try: results = await self.ds.execute( self.database, suggested_facet_sql, self.params, truncate=False, custom_time_limit=self.ds.setting( "facet_suggest_time_limit_ms"), log_sql_errors=False, ) types = tuple(r[0] for r in results.rows) if types in (("array", ), ("array", None)): # Now sanity check that first 100 arrays contain only strings first_100 = [ v[0] for v in await self.ds.execute( self.database, ("select {column} from ({sql}) " "where {column} is not null " "and {column} != '' " "and json_array_length({column}) > 0 " "limit 100").format(column=escape_sqlite(column), sql=self.sql), self.params, truncate=False, custom_time_limit=self.ds.setting( "facet_suggest_time_limit_ms"), log_sql_errors=False, ) ] if first_100 and all( self._is_json_array_of_strings(r) for r in first_100): suggested_facets.append({ "name": column, "type": "array", "toggle_url": self.ds.absolute_url( self.request, path_with_added_args(self.request, {"_facet_array": column}), ), }) except (QueryInterrupted, sqlite3.OperationalError): continue return suggested_facets
async def suggest(self): columns = await self.get_columns(self.sql, self.params) return ([{ "name": column, "toggle_url": self.ds.absolute_url( self.request, path_with_added_args(self.request, {"_facet_dummy": column}), ), "type": "dummy", } for column in columns] if self.request.args.get("_dummy_facet") else [])
async def suggest(self): row_count = await self.get_row_count() columns = await self.get_columns(self.sql, self.params) facet_size = self.ds.config("default_facet_size") suggested_facets = [] already_enabled = [c["config"]["simple"] for c in self.get_configs()] for column in columns: if column in already_enabled: continue suggested_facet_sql = """ select {column}, count(*) as n from ( {sql} ) where {column} is not null group by {column} limit {limit} """.format( column=escape_sqlite(column), sql=self.sql, limit=facet_size + 1 ) distinct_values = None try: distinct_values = await self.ds.execute( self.database, suggested_facet_sql, self.params, truncate=False, custom_time_limit=self.ds.config("facet_suggest_time_limit_ms"), ) num_distinct_values = len(distinct_values) if ( num_distinct_values and num_distinct_values > 1 and num_distinct_values <= facet_size and num_distinct_values < row_count # And at least one has n > 1 and any(r["n"] > 1 for r in distinct_values) ): suggested_facets.append( { "name": column, "toggle_url": self.ds.absolute_url( self.request, path_with_added_args(self.request, {"_facet": column}), ), } ) except QueryInterrupted: continue return suggested_facets
async def suggest(self): columns = await self.get_columns(self.sql, self.params) already_enabled = [c["config"]["simple"] for c in self.get_configs()] suggested_facets = [] for column in columns: if column in already_enabled: continue # Does this column contain any dates in the first 100 rows? suggested_facet_sql = """ select date({column}) from ( {sql} ) where {column} glob "????-??-*" limit 100; """.format(column=escape_sqlite(column), sql=self.sql) try: results = await self.ds.execute( self.database, suggested_facet_sql, self.params, truncate=False, custom_time_limit=self.ds.setting( "facet_suggest_time_limit_ms"), log_sql_errors=False, ) values = tuple(r[0] for r in results.rows) if any(values): suggested_facets.append({ "name": column, "type": "date", "toggle_url": self.ds.absolute_url( self.request, self.ds.urls.path( path_with_added_args(self.request, {"_facet_date": column})), ), }) except (QueryInterrupted, sqlite3.OperationalError): continue return suggested_facets
async def suggest(self): columns = await self.get_columns(self.sql, self.params) suggested_facets = [] already_enabled = [c["config"]["simple"] for c in self.get_configs()] for column in columns: if column in already_enabled: continue # Is every value in this column either null or a JSON array? suggested_facet_sql = """ select distinct json_type({column}) from ({sql}) """.format(column=escape_sqlite(column), sql=self.sql) try: results = await self.ds.execute( self.database, suggested_facet_sql, self.params, truncate=False, custom_time_limit=self.ds.config( "facet_suggest_time_limit_ms"), log_sql_errors=False, ) types = tuple(r[0] for r in results.rows) if types in (("array", ), ("array", None)): suggested_facets.append({ "name": column, "type": "array", "toggle_url": self.ds.absolute_url( self.request, path_with_added_args(self.request, {"_facet_array": column}), ), }) except (InterruptedError, sqlite3.OperationalError): continue return suggested_facets
async def suggest(self): # This is calculated based on foreign key relationships to this table # Are there any many-to-many tables pointing here? suggested_facets = [] all_foreign_keys = await self.ds.execute_against_connection_in_thread( self.database, get_all_foreign_keys) if not all_foreign_keys.get(self.table): # It's probably a view return [] args = set(self.get_querystring_pairs()) incoming = all_foreign_keys[self.table]["incoming"] # Do any of these incoming tables have exactly two outgoing keys? for fk in incoming: other_table = fk["other_table"] other_table_outgoing_foreign_keys = all_foreign_keys[other_table][ "outgoing"] if len(other_table_outgoing_foreign_keys) == 2: destination_table = [ t for t in other_table_outgoing_foreign_keys if t["other_table"] != self.table ][0]["other_table"] # Only suggest if it's not selected already if ("_facet_m2m", destination_table) in args: continue suggested_facets.append({ "name": destination_table, "type": "m2m", "toggle_url": self.ds.absolute_url( self.request, path_with_added_args( self.request, {"_facet_m2m": destination_table}), ), }) return suggested_facets
async def extra_template(): display_rows = [] for row in results.rows if results else []: display_row = [] for column, value in zip(results.columns, row): display_value = value # Let the plugins have a go # pylint: disable=no-member plugin_display_value = None for candidate in pm.hook.render_cell( value=value, column=column, table=None, database=database, datasette=self.ds, ): candidate = await await_me_maybe(candidate) if candidate is not None: plugin_display_value = candidate break if plugin_display_value is not None: display_value = plugin_display_value else: if value in ("", None): display_value = Markup(" ") elif is_url(str(display_value).strip()): display_value = Markup( '<a href="{url}">{url}</a>'.format( url=escape(value.strip()))) elif isinstance(display_value, bytes): blob_url = path_with_format( request=request, format="blob", extra_qs={ "_blob_column": column, "_blob_hash": hashlib.sha256(display_value).hexdigest(), }, ) display_value = Markup( '<a class="blob-download" href="{}"><Binary: {} byte{}></a>' .format( blob_url, len(display_value), "" if len(value) == 1 else "s", )) display_row.append(display_value) display_rows.append(display_row) # Show 'Edit SQL' button only if: # - User is allowed to execute SQL # - SQL is an approved SELECT statement # - No magic parameters, so no :_ in the SQL string edit_sql_url = None is_validated_sql = False try: validate_sql_select(sql) is_validated_sql = True except InvalidSql: pass if allow_execute_sql and is_validated_sql and ":_" not in sql: edit_sql_url = (self.ds.urls.database(database) + "?" + urlencode({ **{ "sql": sql, }, **named_parameter_values, })) show_hide_hidden = "" if metadata.get("hide_sql"): if bool(params.get("_show_sql")): show_hide_link = path_with_removed_args( request, {"_show_sql"}) show_hide_text = "hide" show_hide_hidden = ( '<input type="hidden" name="_show_sql" value="1">') else: show_hide_link = path_with_added_args( request, {"_show_sql": 1}) show_hide_text = "show" else: if bool(params.get("_hide_sql")): show_hide_link = path_with_removed_args( request, {"_hide_sql"}) show_hide_text = "show" show_hide_hidden = ( '<input type="hidden" name="_hide_sql" value="1">') else: show_hide_link = path_with_added_args( request, {"_hide_sql": 1}) show_hide_text = "hide" hide_sql = show_hide_text == "show" return { "display_rows": display_rows, "custom_sql": True, "named_parameter_values": named_parameter_values, "editable": editable, "canned_query": canned_query, "edit_sql_url": edit_sql_url, "metadata": metadata, "settings": self.ds.settings_dict(), "request": request, "show_hide_link": show_hide_link, "show_hide_text": show_hide_text, "show_hide_hidden": markupsafe.Markup(show_hide_hidden), "hide_sql": hide_sql, }
async def facet_results(self): facet_results = {} facets_timed_out = [] qs_pairs = self.get_querystring_pairs() facet_size = self.ds.config("default_facet_size") for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] facet_sql = """ select {col} as value, count(*) as count from ( {sql} ) where {col} is not null group by {col} order by count desc limit {limit} """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, facet_sql, self.params, truncate=False, custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] facet_results[column] = { "name": column, "type": self.type, "hideable": source != "metadata", "toggle_url": path_with_removed_args(self.request, {"_facet": column}), "results": facet_results_values, "truncated": len(facet_rows_results) > facet_size, } facet_rows = facet_rows_results.rows[:facet_size] if self.table: # Attempt to expand foreign keys into labels values = [row["value"] for row in facet_rows] expanded = await self.ds.expand_foreign_keys( self.database, self.table, column, values) else: expanded = {} for row in facet_rows: selected = (column, str(row["value"])) in qs_pairs if selected: toggle_path = path_with_removed_args( self.request, {column: str(row["value"])}) else: toggle_path = path_with_added_args( self.request, {column: row["value"]}) facet_results_values.append({ "value": row["value"], "label": expanded.get((column, row["value"]), row["value"]), "count": row["count"], "toggle_url": self.ds.absolute_url(self.request, toggle_path), "selected": selected, }) except InterruptedError: facets_timed_out.append(column) return facet_results, facets_timed_out
async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): _format, kwargs = await self.get_format(request, database, kwargs) if _format == "csv": return await self.as_csv(request, database, hash, **kwargs) if _format is None: # HTML views default to expanding all foreign key labels kwargs["default_labels"] = True extra_template_data = {} start = time.time() status_code = 200 templates = [] try: response_or_template_contexts = await self.data( request, database, hash, **kwargs) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: data, extra_template_data, templates = response_or_template_contexts except QueryInterrupted: raise DatasetteError( """ SQL query took too long. The time limit is controlled by the <a href="https://datasette.readthedocs.io/en/stable/config.html#sql-time-limit-ms">sql_time_limit_ms</a> configuration option. """, title="SQL Interrupted", status=400, messagge_is_html=True, ) except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except (sqlite3.OperationalError) as e: raise DatasetteError(str(e)) except DatasetteError: raise end = time.time() data["query_ms"] = (end - start) * 1000 for key in ("source", "source_url", "license", "license_url"): value = self.ds.metadata(key) if value: data[key] = value # Special case for .jsono extension - redirect to _shape=objects if _format == "jsono": return self.redirect( request, path_with_added_args( request, {"_shape": "objects"}, path=request.path.rsplit(".jsono", 1)[0] + ".json", ), forward_querystring=False, ) if _format in self.ds.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) result = self.ds.renderers[_format](request.args, data, self.name) if result is None: raise NotFound("No data") r = Response( body=result.get("body"), status=result.get("status_code", 200), content_type=result.get("content_type", "text/plain"), ) else: extras = {} if callable(extra_template_data): extras = extra_template_data() if asyncio.iscoroutine(extras): extras = await extras else: extras = extra_template_data url_labels_extra = {} if data.get("expandable_columns"): url_labels_extra = {"_labels": "on"} renderers = { key: path_with_format(request, key, {**url_labels_extra}) for key in self.ds.renderers.keys() } url_csv_args = {"_size": "max", **url_labels_extra} url_csv = path_with_format(request, "csv", url_csv_args) url_csv_path = url_csv.split("?")[0] context = { **data, **extras, **{ "renderers": renderers, "url_csv": url_csv, "url_csv_path": url_csv_path, "url_csv_hidden_args": [(key, value) for key, value in urllib.parse.parse_qsl(request.query_string) if key not in ("_labels", "_facet", "_size")] + [("_size", "max")], "datasette_version": __version__, "config": self.ds.config_dict(), }, } if "metadata" not in context: context["metadata"] = self.ds.metadata r = await self.render(templates, request=request, context=context) r.status = status_code ttl = request.args.get("_ttl", None) if ttl is None or not ttl.isdigit(): if correct_hash_provided: ttl = self.ds.config("default_cache_ttl_hashed") else: ttl = self.ds.config("default_cache_ttl") return self.set_response_headers(r, ttl)
async def data(self, request, name, hash, table): table = urllib.parse.unquote_plus(table) canned_query = self.ds.get_canned_query(name, table) if canned_query is not None: return await self.custom_sql( request, name, hash, canned_query["sql"], editable=False, canned_query=table, ) is_view = bool( list(await self.execute( name, "SELECT count(*) from sqlite_master WHERE type = 'view' and name=:n", {"n": table}, ))[0][0]) view_definition = None table_definition = None if is_view: view_definition = list(await self.execute( name, 'select sql from sqlite_master where name = :n and type="view"', {"n": table}, ))[0][0] else: table_definition_rows = list(await self.execute( name, 'select sql from sqlite_master where name = :n and type="table"', {"n": table}, )) if not table_definition_rows: raise NotFound("Table not found: {}".format(table)) table_definition = table_definition_rows[0][0] info = self.ds.inspect() table_info = info[name]["tables"].get(table) or {} pks = table_info.get("primary_keys") or [] use_rowid = not pks and not is_view if use_rowid: select = "rowid, *" order_by = "rowid" order_by_pks = "rowid" else: select = "*" order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" # We roll our own query_string decoder because by default Sanic # drops anything with an empty value e.g. ?name__exact= args = RequestParameters( urllib.parse.parse_qs(request.query_string, keep_blank_values=True)) # Special args start with _ and do not contain a __ # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} special_args_lists = {} other_args = {} for key, value in args.items(): if key.startswith("_") and "__" not in key: special_args[key] = value[0] special_args_lists[key] = value else: other_args[key] = value[0] # Handle ?_filter_column and redirect, if present redirect_params = filters_should_redirect(special_args) if redirect_params: return self.redirect( request, path_with_added_args(request, redirect_params), forward_querystring=False, ) # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort) if "_sort_by_desc" in special_args: return self.redirect( request, path_with_added_args( request, { "_sort_desc": special_args.get("_sort"), "_sort_by_desc": None, "_sort": None, }, ), forward_querystring=False, ) table_metadata = self.table_metadata(name, table) units = table_metadata.get("units", {}) filters = Filters(sorted(other_args.items()), units, ureg) where_clauses, params = filters.build_where_clauses() # _search support: fts_table = info[name]["tables"].get(table, {}).get("fts_table") search_args = dict(pair for pair in special_args.items() if pair[0].startswith("_search")) search_descriptions = [] search = "" if fts_table and search_args: if "_search" in search_args: # Simple ?_search=xxx search = search_args["_search"] where_clauses.append( "rowid in (select rowid from [{fts_table}] where [{fts_table}] match :search)" .format(fts_table=fts_table)) search_descriptions.append( 'search matches "{}"'.format(search)) params["search"] = search else: # More complex: search against specific columns valid_columns = set(info[name]["tables"][fts_table]["columns"]) for i, (key, search_text) in enumerate(search_args.items()): search_col = key.split("_search_", 1)[1] if search_col not in valid_columns: raise DatasetteError("Cannot search by that column", status=400) where_clauses.append( "rowid in (select rowid from [{fts_table}] where [{search_col}] match :search_{i})" .format(fts_table=fts_table, search_col=search_col, i=i)) search_descriptions.append( 'search column "{}" matches "{}"'.format( search_col, search_text)) params["search_{}".format(i)] = search_text table_rows_count = None sortable_columns = set() if not is_view: table_rows_count = table_info["count"] sortable_columns = self.sortable_columns_for_table( name, table, use_rowid) # Allow for custom sort order sort = special_args.get("_sort") if sort: if sort not in sortable_columns: raise DatasetteError("Cannot sort table by {}".format(sort)) order_by = escape_sqlite(sort) sort_desc = special_args.get("_sort_desc") if sort_desc: if sort_desc not in sortable_columns: raise DatasetteError( "Cannot sort table by {}".format(sort_desc)) if sort: raise DatasetteError( "Cannot use _sort and _sort_desc at the same time") order_by = "{} desc".format(escape_sqlite(sort_desc)) from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", ) count_sql = "select count(*) {}".format(from_sql) _next = special_args.get("_next") offset = "" if _next: if is_view: # _next is an offset offset = " offset {}".format(int(_next)) else: components = urlsafe_components(_next) # If a sort order is applied, the first of these is the sort value if sort or sort_desc: sort_value = components[0] # Special case for if non-urlencoded first token was $null if _next.split(",")[0] == "$null": sort_value = None components = components[1:] # Figure out the SQL for next-based-on-primary-key first next_by_pk_clauses = [] if use_rowid: next_by_pk_clauses.append("rowid > :p{}".format( len(params))) params["p{}".format(len(params))] = components[0] else: # Apply the tie-breaker based on primary keys if len(components) == len(pks): param_len = len(params) next_by_pk_clauses.append( compound_keys_after_sql(pks, param_len)) for i, pk_value in enumerate(components): params["p{}".format(param_len + i)] = pk_value # Now add the sort SQL, which may incorporate next_by_pk_clauses if sort or sort_desc: if sort_value is None: if sort_desc: # Just items where column is null ordered by pk where_clauses.append( "({column} is null and {next_clauses})".format( column=escape_sqlite(sort_desc), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} is not null or ({column} is null and {next_clauses}))" .format( column=escape_sqlite(sort), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))" .format( column=escape_sqlite(sort or sort_desc), op=">" if sort else "<", p=len(params), extra_desc_only="" if sort else " or {column2} is null".format( column2=escape_sqlite(sort or sort_desc)), next_clauses=" and ".join(next_by_pk_clauses), )) params["p{}".format(len(params))] = sort_value order_by = "{}, {}".format(order_by, order_by_pks) else: where_clauses.extend(next_by_pk_clauses) where_clause = "" if where_clauses: where_clause = "where {} ".format(" and ".join(where_clauses)) if order_by: order_by = "order by {} ".format(order_by) # _group_count=col1&_group_count=col2 group_count = special_args_lists.get("_group_count") or [] if group_count: sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( group_cols=", ".join('"{}"'.format(group_count_col) for group_count_col in group_count), table_name=escape_sqlite(table), where=where_clause, ) return await self.custom_sql(request, name, hash, sql, editable=True) extra_args = {} # Handle ?_page_size=500 page_size = request.raw_args.get("_size") if page_size: if page_size == "max": page_size = self.max_returned_rows try: page_size = int(page_size) if page_size < 0: raise ValueError except ValueError: raise DatasetteError("_size must be a positive integer", status=400) if page_size > self.max_returned_rows: raise DatasetteError("_size must be <= {}".format( self.max_returned_rows), status=400) extra_args["page_size"] = page_size else: page_size = self.page_size sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format( select=select, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, limit=page_size + 1, offset=offset, ) if request.raw_args.get("_timelimit"): extra_args["custom_time_limit"] = int( request.raw_args["_timelimit"]) rows, truncated, description = await self.execute(name, sql, params, truncate=True, **extra_args) # facets support try: facets = request.args["_facet"] except KeyError: facets = table_metadata.get("facets", []) facet_results = {} for column in facets: facet_sql = """ select {col} as value, count(*) as count {from_sql} group by {col} order by count desc limit 20 """.format(col=escape_sqlite(column), from_sql=from_sql) try: facet_rows = await self.execute(name, facet_sql, params, truncate=False, custom_time_limit=200) facet_results[column] = [{ "value": row["value"], "count": row["count"], "toggle_url": urllib.parse.urljoin( request.url, path_with_added_args(request, {column: row["value"]}), ), } for row in facet_rows] except sqlite3.OperationalError: # Hit time limit pass columns = [r[0] for r in description] rows = list(rows) filter_columns = columns[:] if use_rowid and filter_columns[0] == "rowid": filter_columns = filter_columns[1:] # Pagination next link next_value = None next_url = None if len(rows) > page_size and page_size > 0: if is_view: next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: prefix = rows[-2][sort or sort_desc] if prefix is None: prefix = "$null" else: prefix = urllib.parse.quote_plus(str(prefix)) next_value = "{},{}".format(prefix, next_value) added_args = {"_next": next_value} if sort: added_args["_sort"] = sort else: added_args["_sort_desc"] = sort_desc else: added_args = {"_next": next_value} next_url = urllib.parse.urljoin( request.url, path_with_added_args(request, added_args)) rows = rows[:page_size] # Number of filtered rows in whole set: filtered_table_rows_count = None if count_sql: try: count_rows = list(await self.execute(name, count_sql, params)) filtered_table_rows_count = count_rows[0][0] except sqlite3.OperationalError: # Almost certainly hit the timeout pass # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en( extra=search_descriptions) if sort or sort_desc: sorted_by = "sorted by {}{}".format( (sort or sort_desc), " descending" if sort_desc else "") human_description_en = " ".join( [b for b in [human_description_en, sorted_by] if b]) async def extra_template(): display_columns, display_rows = await self.display_columns_and_rows( name, table, description, rows, link_column=not is_view, expand_foreign_keys=True, ) metadata = self.ds.metadata.get("databases", {}).get(name, {}).get("tables", {}).get(table, {}) self.ds.update_with_inherited_metadata(metadata) return { "database_hash": hash, "supports_search": bool(fts_table), "search": search or "", "use_rowid": use_rowid, "filters": filters, "display_columns": display_columns, "filter_columns": filter_columns, "display_rows": display_rows, "is_sortable": any(c["sortable"] for c in display_columns), "path_with_added_args": path_with_added_args, "request": request, "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, "custom_rows_and_columns_templates": [ "_rows_and_columns-{}-{}.html".format( to_css_class(name), to_css_class(table)), "_rows_and_columns-table-{}-{}.html".format( to_css_class(name), to_css_class(table)), "_rows_and_columns.html", ], "metadata": metadata, } return { "database": name, "table": table, "is_view": is_view, "view_definition": view_definition, "table_definition": table_definition, "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": truncated, "table_rows_count": table_rows_count, "filtered_table_rows_count": filtered_table_rows_count, "columns": columns, "primary_keys": pks, "units": units, "query": { "sql": sql, "params": params }, "facet_results": facet_results, "next": next_value and str(next_value) or None, "next_url": next_url, }, extra_template, ( "table-{}-{}.html".format(to_css_class(name), to_css_class(table)), "table.html", )
async def data( self, request, database, hash, table, default_labels=False, _next=None, _size=None, ): canned_query = await self.ds.get_canned_query(database, table, request.actor) if canned_query: return await QueryView(self.ds).data( request, database, hash, canned_query["sql"], metadata=canned_query, editable=False, canned_query=table, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) db = self.ds.databases[database] is_view = bool(await db.get_view_definition(table)) table_exists = bool(await db.table_exists(table)) if not is_view and not table_exists: raise NotFound("Table not found: {}".format(table)) await self.check_permission(request, "view-instance") await self.check_permission(request, "view-database", database) await self.check_permission(request, "view-table", (database, table)) private = not await self.ds.permission_allowed( None, "view-table", (database, table), default=True ) pks = await db.primary_keys(table) table_columns = await db.table_columns(table) select_columns = ", ".join(escape_sqlite(t) for t in table_columns) use_rowid = not pks and not is_view if use_rowid: select = "rowid, {}".format(select_columns) order_by = "rowid" order_by_pks = "rowid" else: select = select_columns order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" # Ensure we don't drop anything with an empty value e.g. ?name__exact= args = MultiParams( urllib.parse.parse_qs(request.query_string, keep_blank_values=True) ) # Special args start with _ and do not contain a __ # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} other_args = [] for key in args: if key.startswith("_") and "__" not in key: special_args[key] = args[key] else: for v in args.getlist(key): other_args.append((key, v)) # Handle ?_filter_column and redirect, if present redirect_params = filters_should_redirect(special_args) if redirect_params: return self.redirect( request, path_with_added_args(request, redirect_params), forward_querystring=False, ) # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort) if "_sort_by_desc" in special_args: return self.redirect( request, path_with_added_args( request, { "_sort_desc": special_args.get("_sort"), "_sort_by_desc": None, "_sort": None, }, ), forward_querystring=False, ) table_metadata = self.ds.table_metadata(database, table) units = table_metadata.get("units", {}) filters = Filters(sorted(other_args), units, ureg) where_clauses, params = filters.build_where_clauses(table) extra_wheres_for_ui = [] # Add _where= from querystring if "_where" in request.args: if not await self.ds.permission_allowed( request.actor, "execute-sql", resource=database, default=True, ): raise DatasetteError("_where= is not allowed", status=403) else: where_clauses.extend(request.args.getlist("_where")) extra_wheres_for_ui = [ { "text": text, "remove_url": path_with_removed_args(request, {"_where": text}), } for text in request.args.getlist("_where") ] # Support for ?_through={table, column, value} extra_human_descriptions = [] if "_through" in request.args: for through in request.args.getlist("_through"): through_data = json.loads(through) through_table = through_data["table"] other_column = through_data["column"] value = through_data["value"] outgoing_foreign_keys = await db.foreign_keys_for_table(through_table) try: fk_to_us = [ fk for fk in outgoing_foreign_keys if fk["other_table"] == table ][0] except IndexError: raise DatasetteError( "Invalid _through - could not find corresponding foreign key" ) param = "p{}".format(len(params)) where_clauses.append( "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})".format( through_table=escape_sqlite(through_table), our_pk=escape_sqlite(fk_to_us["other_column"]), our_column=escape_sqlite(fk_to_us["column"]), other_column=escape_sqlite(other_column), param=param, ) ) params[param] = value extra_human_descriptions.append( '{}.{} = "{}"'.format(through_table, other_column, value) ) # _search support: fts_table = special_args.get("_fts_table") fts_table = fts_table or table_metadata.get("fts_table") fts_table = fts_table or await db.fts_table(table) fts_pk = special_args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) search_args = dict( pair for pair in special_args.items() if pair[0].startswith("_search") ) search = "" search_mode_raw = special_args.get("_searchmode") == "raw" if fts_table and search_args: if "_search" in search_args: # Simple ?_search=xxx search = search_args["_search"] where_clauses.append( "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})".format( fts_table=escape_sqlite(fts_table), fts_pk=escape_sqlite(fts_pk), match_clause=":search" if search_mode_raw else "escape_fts(:search)", ) ) extra_human_descriptions.append('search matches "{}"'.format(search)) params["search"] = search else: # More complex: search against specific columns for i, (key, search_text) in enumerate(search_args.items()): search_col = key.split("_search_", 1)[1] if search_col not in await db.table_columns(fts_table): raise DatasetteError("Cannot search by that column", status=400) where_clauses.append( "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})".format( fts_table=escape_sqlite(fts_table), search_col=escape_sqlite(search_col), match_clause=":search_{}".format(i) if search_mode_raw else "escape_fts(:search_{})".format(i), ) ) extra_human_descriptions.append( 'search column "{}" matches "{}"'.format( search_col, search_text ) ) params["search_{}".format(i)] = search_text sortable_columns = set() sortable_columns = await self.sortable_columns_for_table( database, table, use_rowid ) # Allow for custom sort order sort = special_args.get("_sort") sort_desc = special_args.get("_sort_desc") if not sort and not sort_desc: sort = table_metadata.get("sort") sort_desc = table_metadata.get("sort_desc") if sort and sort_desc: raise DatasetteError("Cannot use _sort and _sort_desc at the same time") if sort: if sort not in sortable_columns: raise DatasetteError("Cannot sort table by {}".format(sort)) order_by = escape_sqlite(sort) if sort_desc: if sort_desc not in sortable_columns: raise DatasetteError("Cannot sort table by {}".format(sort_desc)) order_by = "{} desc".format(escape_sqlite(sort_desc)) from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", ) # Copy of params so we can mutate them later: from_sql_params = dict(**params) count_sql = "select count(*) {}".format(from_sql) _next = _next or special_args.get("_next") offset = "" if _next: if is_view: # _next is an offset offset = " offset {}".format(int(_next)) else: components = urlsafe_components(_next) # If a sort order is applied, the first of these is the sort value if sort or sort_desc: sort_value = components[0] # Special case for if non-urlencoded first token was $null if _next.split(",")[0] == "$null": sort_value = None components = components[1:] # Figure out the SQL for next-based-on-primary-key first next_by_pk_clauses = [] if use_rowid: next_by_pk_clauses.append("rowid > :p{}".format(len(params))) params["p{}".format(len(params))] = components[0] else: # Apply the tie-breaker based on primary keys if len(components) == len(pks): param_len = len(params) next_by_pk_clauses.append( compound_keys_after_sql(pks, param_len) ) for i, pk_value in enumerate(components): params["p{}".format(param_len + i)] = pk_value # Now add the sort SQL, which may incorporate next_by_pk_clauses if sort or sort_desc: if sort_value is None: if sort_desc: # Just items where column is null ordered by pk where_clauses.append( "({column} is null and {next_clauses})".format( column=escape_sqlite(sort_desc), next_clauses=" and ".join(next_by_pk_clauses), ) ) else: where_clauses.append( "({column} is not null or ({column} is null and {next_clauses}))".format( column=escape_sqlite(sort), next_clauses=" and ".join(next_by_pk_clauses), ) ) else: where_clauses.append( "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format( column=escape_sqlite(sort or sort_desc), op=">" if sort else "<", p=len(params), extra_desc_only="" if sort else " or {column2} is null".format( column2=escape_sqlite(sort or sort_desc) ), next_clauses=" and ".join(next_by_pk_clauses), ) ) params["p{}".format(len(params))] = sort_value order_by = "{}, {}".format(order_by, order_by_pks) else: where_clauses.extend(next_by_pk_clauses) where_clause = "" if where_clauses: where_clause = "where {} ".format(" and ".join(where_clauses)) if order_by: order_by = "order by {} ".format(order_by) extra_args = {} # Handle ?_size=500 page_size = _size or request.args.get("_size") or table_metadata.get("size") if page_size: if page_size == "max": page_size = self.ds.max_returned_rows try: page_size = int(page_size) if page_size < 0: raise ValueError except ValueError: raise DatasetteError("_size must be a positive integer", status=400) if page_size > self.ds.max_returned_rows: raise DatasetteError( "_size must be <= {}".format(self.ds.max_returned_rows), status=400 ) extra_args["page_size"] = page_size else: page_size = self.ds.page_size sql_no_limit = "select {select} from {table_name} {where}{order_by}".format( select=select, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, ) sql = "{sql_no_limit} limit {limit}{offset}".format( sql_no_limit=sql_no_limit.rstrip(), limit=page_size + 1, offset=offset ) if request.args.get("_timelimit"): extra_args["custom_time_limit"] = int(request.args.get("_timelimit")) results = await db.execute(sql, params, truncate=True, **extra_args) # Number of filtered rows in whole set: filtered_table_rows_count = None if ( not db.is_mutable and self.ds.inspect_data and count_sql == "select count(*) from {} ".format(table) ): try: filtered_table_rows_count = self.ds.inspect_data[database]["tables"][ table ]["count"] except KeyError: pass if count_sql and filtered_table_rows_count is None: try: count_rows = list(await db.execute(count_sql, from_sql_params)) filtered_table_rows_count = count_rows[0][0] except QueryInterrupted: pass # facets support if not self.ds.config("allow_facet") and any( arg.startswith("_facet") for arg in request.args ): raise DatasetteError("_facet= is not allowed", status=400) # pylint: disable=no-member facet_classes = list( itertools.chain.from_iterable(pm.hook.register_facet_classes()) ) facet_results = {} facets_timed_out = [] facet_instances = [] for klass in facet_classes: facet_instances.append( klass( self.ds, request, database, sql=sql_no_limit, params=params, table=table, metadata=table_metadata, row_count=filtered_table_rows_count, ) ) for facet in facet_instances: ( instance_facet_results, instance_facets_timed_out, ) = await facet.facet_results() facet_results.update(instance_facet_results) facets_timed_out.extend(instance_facets_timed_out) # Figure out columns and rows for the query columns = [r[0] for r in results.description] rows = list(results.rows) # Expand labeled columns if requested expanded_columns = [] expandable_columns = await self.expandable_columns(database, table) columns_to_expand = None try: all_labels = value_as_boolean(special_args.get("_labels", "")) except ValueError: all_labels = default_labels # Check for explicit _label= if "_label" in request.args: columns_to_expand = request.args.getlist("_label") if columns_to_expand is None and all_labels: # expand all columns with foreign keys columns_to_expand = [fk["column"] for fk, _ in expandable_columns] if columns_to_expand: expanded_labels = {} for fk, _ in expandable_columns: column = fk["column"] if column not in columns_to_expand: continue expanded_columns.append(column) # Gather the values column_index = columns.index(column) values = [row[column_index] for row in rows] # Expand them expanded_labels.update( await self.ds.expand_foreign_keys(database, table, column, values) ) if expanded_labels: # Rewrite the rows new_rows = [] for row in rows: new_row = CustomRow(columns) for column in row.keys(): value = row[column] if (column, value) in expanded_labels and value is not None: new_row[column] = { "value": value, "label": expanded_labels[(column, value)], } else: new_row[column] = value new_rows.append(new_row) rows = new_rows # Pagination next link next_value = None next_url = None if len(rows) > page_size and page_size > 0: if is_view: next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: prefix = rows[-2][sort or sort_desc] if isinstance(prefix, dict) and "value" in prefix: prefix = prefix["value"] if prefix is None: prefix = "$null" else: prefix = urllib.parse.quote_plus(str(prefix)) next_value = "{},{}".format(prefix, next_value) added_args = {"_next": next_value} if sort: added_args["_sort"] = sort else: added_args["_sort_desc"] = sort_desc else: added_args = {"_next": next_value} next_url = self.ds.absolute_url( request, path_with_replaced_args(request, added_args) ) rows = rows[:page_size] # Detect suggested facets suggested_facets = [] if ( self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next ): for facet in facet_instances: suggested_facets.extend(await facet.suggest()) # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en( extra=extra_human_descriptions ) if sort or sort_desc: sorted_by = "sorted by {}{}".format( (sort or sort_desc), " descending" if sort_desc else "" ) human_description_en = " ".join( [b for b in [human_description_en, sorted_by] if b] ) async def extra_template(): nonlocal sort display_columns, display_rows = await self.display_columns_and_rows( database, table, results.description, rows, link_column=not is_view, truncate_cells=self.ds.config("truncate_cells_html"), ) metadata = ( (self.ds.metadata("databases") or {}) .get(database, {}) .get("tables", {}) .get(table, {}) ) self.ds.update_with_inherited_metadata(metadata) form_hidden_args = [] for arg in ("_fts_table", "_fts_pk"): if arg in special_args: form_hidden_args.append((arg, special_args[arg])) if request.args.get("_where"): for where_text in request.args.getlist("_where"): form_hidden_args.append(("_where", where_text)) # if no sort specified AND table has a single primary key, # set sort to that so arrow is displayed if not sort and not sort_desc: if 1 == len(pks): sort = pks[0] elif use_rowid: sort = "rowid" return { "supports_search": bool(fts_table), "search": search or "", "use_rowid": use_rowid, "filters": filters, "display_columns": display_columns, "filter_columns": columns, "display_rows": display_rows, "facets_timed_out": facets_timed_out, "sorted_facet_results": sorted( facet_results.values(), key=lambda f: (len(f["results"]), f["name"]), reverse=True, ), "extra_wheres_for_ui": extra_wheres_for_ui, "form_hidden_args": form_hidden_args, "is_sortable": any(c["sortable"] for c in display_columns), "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, "append_querystring": append_querystring, "request": request, "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, "custom_table_templates": [ "_table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), "_table-table-{}-{}.html".format( to_css_class(database), to_css_class(table) ), "_table.html", ], "metadata": metadata, "view_definition": await db.get_view_definition(table), "table_definition": await db.get_table_definition(table), } return ( { "database": database, "table": table, "is_view": is_view, "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": results.truncated, "filtered_table_rows_count": filtered_table_rows_count, "expanded_columns": expanded_columns, "expandable_columns": expandable_columns, "columns": columns, "primary_keys": pks, "units": units, "query": {"sql": sql, "params": params}, "facet_results": facet_results, "suggested_facets": suggested_facets, "next": next_value and str(next_value) or None, "next_url": next_url, "private": private, "allow_execute_sql": await self.ds.permission_allowed( request.actor, "execute-sql", database, default=True ), }, extra_template, ( "table-{}-{}.html".format(to_css_class(database), to_css_class(table)), "table.html", ), )
def test_path_with_added_args(path, added_args, expected): request = Request(path.encode("utf8"), {}, "1.1", "GET", None) actual = utils.path_with_added_args(request, added_args) assert expected == actual
async def view_get(self, request, name, hash, **kwargs): # If ?_format= is provided, use that as the format _format = request.args.get("_format", None) if not _format: _format = (kwargs.pop("as_format", None) or "").lstrip(".") if "table_and_format" in kwargs: table, _ext_format = resolve_table_and_format( table_and_format=urllib.parse.unquote_plus( kwargs["table_and_format"]), table_exists=lambda t: self.ds.table_exists(name, t)) _format = _format or _ext_format kwargs["table"] = table del kwargs["table_and_format"] elif "table" in kwargs: kwargs["table"] = urllib.parse.unquote_plus(kwargs["table"]) if _format == "csv": return await self.as_csv(request, name, hash, **kwargs) if _format is None: # HTML views default to expanding all forign key labels kwargs['default_labels'] = True extra_template_data = {} start = time.time() status_code = 200 templates = [] try: response_or_template_contexts = await self.data( request, name, hash, **kwargs) if isinstance(response_or_template_contexts, response.HTTPResponse): return response_or_template_contexts else: data, extra_template_data, templates = response_or_template_contexts except InterruptedError as e: raise DatasetteError(""" SQL query took too long. The time limit is controlled by the <a href="https://datasette.readthedocs.io/en/stable/config.html#sql-time-limit-ms">sql_time_limit_ms</a> configuration option. """, title="SQL Interrupted", status=400, messagge_is_html=True) except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except (sqlite3.OperationalError) as e: raise DatasetteError(str(e)) except DatasetteError: raise end = time.time() data["query_ms"] = (end - start) * 1000 for key in ("source", "source_url", "license", "license_url"): value = self.ds.metadata.get(key) if value: data[key] = value if _format in ("json", "jsono"): # Special case for .jsono extension - redirect to _shape=objects if _format == "jsono": return self.redirect( request, path_with_added_args( request, {"_shape": "objects"}, path=request.path.rsplit(".jsono", 1)[0] + ".json", ), forward_querystring=False, ) # Handle the _json= parameter which may modify data["rows"] json_cols = [] if "_json" in request.args: json_cols = request.args["_json"] if json_cols and "rows" in data and "columns" in data: data["rows"] = convert_specific_columns_to_json( data["rows"], data["columns"], json_cols, ) # unless _json_infinity=1 requested, replace infinity with None if "rows" in data and not value_as_boolean( request.args.get("_json_infinity", "0")): data["rows"] = [remove_infinites(row) for row in data["rows"]] # Deal with the _shape option shape = request.args.get("_shape", "arrays") if shape == "arrayfirst": data = [row[0] for row in data["rows"]] elif shape in ("objects", "object", "array"): columns = data.get("columns") rows = data.get("rows") if rows and columns: data["rows"] = [dict(zip(columns, row)) for row in rows] if shape == "object": error = None if "primary_keys" not in data: error = "_shape=object is only available on tables" else: pks = data["primary_keys"] if not pks: error = "_shape=object not available for tables with no primary keys" else: object_rows = {} for row in data["rows"]: pk_string = path_from_row_pks( row, pks, not pks) object_rows[pk_string] = row data = object_rows if error: data = { "ok": False, "error": error, "database": name, "database_hash": hash, } elif shape == "array": data = data["rows"] elif shape == "arrays": pass else: status_code = 400 data = { "ok": False, "error": "Invalid _shape: {}".format(shape), "status": 400, "title": None, } headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" r = response.HTTPResponse( json.dumps(data, cls=CustomJSONEncoder), status=status_code, content_type="application/json", headers=headers, ) else: extras = {} if callable(extra_template_data): extras = extra_template_data() if asyncio.iscoroutine(extras): extras = await extras else: extras = extra_template_data url_labels_extra = {} if data.get("expandable_columns"): url_labels_extra = {"_labels": "on"} url_csv_args = {"_size": "max", **url_labels_extra} url_csv = path_with_format(request, "csv", url_csv_args) url_csv_path = url_csv.split('?')[0] context = { **data, **extras, **{ "url_json": path_with_format(request, "json", { **url_labels_extra, }), "url_csv": url_csv, "url_csv_path": url_csv_path, "url_csv_args": url_csv_args, "extra_css_urls": self.ds.extra_css_urls(), "extra_js_urls": self.ds.extra_js_urls(), "datasette_version": __version__, "config": self.ds.config_dict(), } } if "metadata" not in context: context["metadata"] = self.ds.metadata r = self.render(templates, **context) r.status = status_code # Set far-future cache expiry if self.ds.cache_headers: ttl = request.args.get("_ttl", None) if ttl is None or not ttl.isdigit(): ttl = self.ds.config("default_cache_ttl") else: ttl = int(ttl) if ttl == 0: ttl_header = 'no-cache' else: ttl_header = 'max-age={}'.format(ttl) r.headers["Cache-Control"] = ttl_header r.headers["Referrer-Policy"] = "no-referrer" return r
async def facet_results(self): # self.configs should be a plain list of columns facet_results = [] facets_timed_out = [] facet_size = self.get_facet_size() for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] # https://github.com/simonw/datasette/issues/448 facet_sql = """ with inner as ({sql}), deduped_array_items as ( select distinct j.value, inner.* from json_each([inner].{col}) j join inner ) select value as value, count(*) as count from deduped_array_items group by value order by count(*) desc, value limit {limit} """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, facet_sql, self.params, truncate=False, custom_time_limit=self.ds.setting("facet_time_limit_ms"), ) facet_results_values = [] facet_results.append({ "name": column, "type": self.type, "results": facet_results_values, "hideable": source != "metadata", "toggle_url": self.ds.urls.path( path_with_removed_args(self.request, {"_facet_array": column})), "truncated": len(facet_rows_results) > facet_size, }) facet_rows = facet_rows_results.rows[:facet_size] pairs = self.get_querystring_pairs() for row in facet_rows: value = str(row["value"]) selected = (f"{column}__arraycontains", value) in pairs if selected: toggle_path = path_with_removed_args( self.request, {f"{column}__arraycontains": value}) else: toggle_path = path_with_added_args( self.request, {f"{column}__arraycontains": value}) facet_results_values.append({ "value": value, "label": value, "count": row["count"], "toggle_url": self.ds.absolute_url(self.request, toggle_path), "selected": selected, }) except QueryInterrupted: facets_timed_out.append(column) return facet_results, facets_timed_out
async def facet_results(self): # self.configs should be a plain list of columns facet_results = {} facets_timed_out = [] facet_size = self.ds.config("default_facet_size") for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] facet_sql = """ select j.value as value, count(*) as count from ( {sql} ) join json_each({col}) j group by j.value order by count desc limit {limit} """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, facet_sql, self.params, truncate=False, custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] facet_results[column] = { "name": column, "type": self.type, "results": facet_results_values, "hideable": source != "metadata", "toggle_url": path_with_removed_args(self.request, {"_facet_array": column}), "truncated": len(facet_rows_results) > facet_size, } facet_rows = facet_rows_results.rows[:facet_size] pairs = self.get_querystring_pairs() for row in facet_rows: value = str(row["value"]) selected = ("{}__arraycontains".format(column), value) in pairs if selected: toggle_path = path_with_removed_args( self.request, {"{}__arraycontains".format(column): value}) else: toggle_path = path_with_added_args( self.request, {"{}__arraycontains".format(column): value}) facet_results_values.append({ "value": value, "label": value, "count": row["count"], "toggle_url": self.ds.absolute_url(self.request, toggle_path), "selected": selected, }) except InterruptedError: facets_timed_out.append(column) return facet_results, facets_timed_out
async def get(self, request): database_route = tilde_decode(request.url_vars["database"]) try: db = self.ds.get_database(route=database_route) except KeyError: raise NotFound("Database not found: {}".format(database_route)) database = db.name _format = request.url_vars["format"] data_kwargs = {} if _format == "csv": return await self.as_csv(request, database_route) if _format is None: # HTML views default to expanding all foreign key labels data_kwargs["default_labels"] = True extra_template_data = {} start = time.perf_counter() status_code = None templates = [] try: response_or_template_contexts = await self.data( request, **data_kwargs) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts # If it has four items, it includes an HTTP status code if len(response_or_template_contexts) == 4: ( data, extra_template_data, templates, status_code, ) = response_or_template_contexts else: data, extra_template_data, templates = response_or_template_contexts except QueryInterrupted: raise DatasetteError( """ SQL query took too long. The time limit is controlled by the <a href="https://docs.datasette.io/en/stable/settings.html#sql-time-limit-ms">sql_time_limit_ms</a> configuration option. """, title="SQL Interrupted", status=400, message_is_html=True, ) except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except sqlite3.OperationalError as e: raise DatasetteError(str(e)) except DatasetteError: raise end = time.perf_counter() data["query_ms"] = (end - start) * 1000 for key in ("source", "source_url", "license", "license_url"): value = self.ds.metadata(key) if value: data[key] = value # Special case for .jsono extension - redirect to _shape=objects if _format == "jsono": return self.redirect( request, path_with_added_args( request, {"_shape": "objects"}, path=request.path.rsplit(".jsono", 1)[0] + ".json", ), forward_querystring=False, ) if _format in self.ds.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) result = call_with_supported_arguments( self.ds.renderers[_format][0], datasette=self.ds, columns=data.get("columns") or [], rows=data.get("rows") or [], sql=data.get("query", {}).get("sql", None), query_name=data.get("query_name"), database=database, table=data.get("table"), request=request, view_name=self.name, # These will be deprecated in Datasette 1.0: args=request.args, data=data, ) if asyncio.iscoroutine(result): result = await result if result is None: raise NotFound("No data") if isinstance(result, dict): r = Response( body=result.get("body"), status=result.get("status_code", status_code or 200), content_type=result.get("content_type", "text/plain"), headers=result.get("headers"), ) elif isinstance(result, Response): r = result if status_code is not None: # Over-ride the status code r.status = status_code else: assert False, f"{result} should be dict or Response" else: extras = {} if callable(extra_template_data): extras = extra_template_data() if asyncio.iscoroutine(extras): extras = await extras else: extras = extra_template_data url_labels_extra = {} if data.get("expandable_columns"): url_labels_extra = {"_labels": "on"} renderers = {} for key, (_, can_render) in self.ds.renderers.items(): it_can_render = call_with_supported_arguments( can_render, datasette=self.ds, columns=data.get("columns") or [], rows=data.get("rows") or [], sql=data.get("query", {}).get("sql", None), query_name=data.get("query_name"), database=database, table=data.get("table"), request=request, view_name=self.name, ) it_can_render = await await_me_maybe(it_can_render) if it_can_render: renderers[key] = self.ds.urls.path( path_with_format(request=request, format=key, extra_qs={**url_labels_extra})) url_csv_args = {"_size": "max", **url_labels_extra} url_csv = self.ds.urls.path( path_with_format(request=request, format="csv", extra_qs=url_csv_args)) url_csv_path = url_csv.split("?")[0] context = { **data, **extras, **{ "renderers": renderers, "url_csv": url_csv, "url_csv_path": url_csv_path, "url_csv_hidden_args": [(key, value) for key, value in urllib.parse.parse_qsl(request.query_string) if key not in ("_labels", "_facet", "_size")] + [("_size", "max")], "datasette_version": __version__, "settings": self.ds.settings_dict(), }, } if "metadata" not in context: context["metadata"] = self.ds.metadata r = await self.render(templates, request=request, context=context) if status_code is not None: r.status = status_code ttl = request.args.get("_ttl", None) if ttl is None or not ttl.isdigit(): ttl = self.ds.setting("default_cache_ttl") return self.set_response_headers(r, ttl)
async def view_get(self, request, name, hash, **kwargs): try: as_json = kwargs.pop("as_json") except KeyError: as_json = False extra_template_data = {} start = time.time() status_code = 200 templates = [] try: response_or_template_contexts = await self.data( request, name, hash, **kwargs ) if isinstance(response_or_template_contexts, response.HTTPResponse): return response_or_template_contexts else: data, extra_template_data, templates = response_or_template_contexts except InterruptedError as e: raise DatasetteError(""" SQL query took too long. The time limit is controlled by the <a href="https://datasette.readthedocs.io/en/stable/config.html#sql-time-limit-ms">sql_time_limit_ms</a> configuration option. """, title="SQL Interrupted", status=400, messagge_is_html=True) except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except (sqlite3.OperationalError) as e: raise DatasetteError(str(e)) except DatasetteError: raise end = time.time() data["query_ms"] = (end - start) * 1000 for key in ("source", "source_url", "license", "license_url"): value = self.ds.metadata.get(key) if value: data[key] = value if as_json: # Special case for .jsono extension - redirect to _shape=objects if as_json == ".jsono": return self.redirect( request, path_with_added_args( request, {"_shape": "objects"}, path=request.path.rsplit(".jsono", 1)[0] + ".json", ), forward_querystring=False, ) # Handle the _json= parameter which may modify data["rows"] json_cols = [] if "_json" in request.args: json_cols = request.args["_json"] if json_cols and "rows" in data and "columns" in data: data["rows"] = convert_specific_columns_to_json( data["rows"], data["columns"], json_cols, ) # Deal with the _shape option shape = request.args.get("_shape", "arrays") if shape == "arrayfirst": data = [row[0] for row in data["rows"]] elif shape in ("objects", "object", "array"): columns = data.get("columns") rows = data.get("rows") if rows and columns: data["rows"] = [dict(zip(columns, row)) for row in rows] if shape == "object": error = None if "primary_keys" not in data: error = "_shape=object is only available on tables" else: pks = data["primary_keys"] if not pks: error = "_shape=object not available for tables with no primary keys" else: object_rows = {} for row in data["rows"]: pk_string = path_from_row_pks(row, pks, not pks) object_rows[pk_string] = row data = object_rows if error: data = { "ok": False, "error": error, "database": name, "database_hash": hash, } elif shape == "array": data = data["rows"] elif shape == "arrays": pass else: status_code = 400 data = { "ok": False, "error": "Invalid _shape: {}".format(shape), "status": 400, "title": None, } headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" r = response.HTTPResponse( json.dumps(data, cls=CustomJSONEncoder), status=status_code, content_type="application/json", headers=headers, ) else: extras = {} if callable(extra_template_data): extras = extra_template_data() if asyncio.iscoroutine(extras): extras = await extras else: extras = extra_template_data context = { **data, **extras, **{ "url_json": path_with_ext(request, ".json"), "url_jsono": path_with_ext(request, ".jsono"), "extra_css_urls": self.ds.extra_css_urls(), "extra_js_urls": self.ds.extra_js_urls(), "datasette_version": __version__, } } if "metadata" not in context: context["metadata"] = self.ds.metadata r = self.render(templates, **context) r.status = status_code # Set far-future cache expiry if self.ds.cache_headers: ttl = request.args.get("_ttl", None) if ttl is None or not ttl.isdigit(): ttl = self.ds.config["default_cache_ttl"] else: ttl = int(ttl) if ttl == 0: ttl_header = 'no-cache' else: ttl_header = 'max-age={}'.format(ttl) r.headers["Cache-Control"] = ttl_header r.headers["Referrer-Policy"] = "no-referrer" return r
async def data(self, request, name, hash, table, default_labels=False, _next=None, _size=None): canned_query = self.ds.get_canned_query(name, table) if canned_query is not None: return await self.custom_sql( request, name, hash, canned_query["sql"], editable=False, canned_query=table, ) is_view = bool(await self.ds.get_view_definition(name, table)) info = self.ds.inspect() table_info = info[name]["tables"].get(table) or {} if not is_view and not table_info: raise NotFound("Table not found: {}".format(table)) pks = table_info.get("primary_keys") or [] use_rowid = not pks and not is_view if use_rowid: select = "rowid, *" order_by = "rowid" order_by_pks = "rowid" else: select = "*" order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" # We roll our own query_string decoder because by default Sanic # drops anything with an empty value e.g. ?name__exact= args = RequestParameters( urllib.parse.parse_qs(request.query_string, keep_blank_values=True) ) # Special args start with _ and do not contain a __ # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} special_args_lists = {} other_args = {} for key, value in args.items(): if key.startswith("_") and "__" not in key: special_args[key] = value[0] special_args_lists[key] = value else: other_args[key] = value[0] # Handle ?_filter_column and redirect, if present redirect_params = filters_should_redirect(special_args) if redirect_params: return self.redirect( request, path_with_added_args(request, redirect_params), forward_querystring=False, ) # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort) if "_sort_by_desc" in special_args: return self.redirect( request, path_with_added_args( request, { "_sort_desc": special_args.get("_sort"), "_sort_by_desc": None, "_sort": None, }, ), forward_querystring=False, ) table_metadata = self.table_metadata(name, table) units = table_metadata.get("units", {}) filters = Filters(sorted(other_args.items()), units, ureg) where_clauses, params = filters.build_where_clauses() # _search support: fts_table = info[name]["tables"].get(table, {}).get("fts_table") search_args = dict( pair for pair in special_args.items() if pair[0].startswith("_search") ) search_descriptions = [] search = "" if fts_table and search_args: if "_search" in search_args: # Simple ?_search=xxx search = search_args["_search"] where_clauses.append( "rowid in (select rowid from {fts_table} where {fts_table} match :search)".format( fts_table=escape_sqlite(fts_table), ) ) search_descriptions.append('search matches "{}"'.format(search)) params["search"] = search else: # More complex: search against specific columns valid_columns = set(info[name]["tables"][fts_table]["columns"]) for i, (key, search_text) in enumerate(search_args.items()): search_col = key.split("_search_", 1)[1] if search_col not in valid_columns: raise DatasetteError("Cannot search by that column", status=400) where_clauses.append( "rowid in (select rowid from {fts_table} where {search_col} match :search_{i})".format( fts_table=escape_sqlite(fts_table), search_col=escape_sqlite(search_col), i=i ) ) search_descriptions.append( 'search column "{}" matches "{}"'.format( search_col, search_text ) ) params["search_{}".format(i)] = search_text table_rows_count = None sortable_columns = set() if not is_view: table_rows_count = table_info["count"] sortable_columns = self.sortable_columns_for_table(name, table, use_rowid) # Allow for custom sort order sort = special_args.get("_sort") if sort: if sort not in sortable_columns: raise DatasetteError("Cannot sort table by {}".format(sort)) order_by = escape_sqlite(sort) sort_desc = special_args.get("_sort_desc") if sort_desc: if sort_desc not in sortable_columns: raise DatasetteError("Cannot sort table by {}".format(sort_desc)) if sort: raise DatasetteError("Cannot use _sort and _sort_desc at the same time") order_by = "{} desc".format(escape_sqlite(sort_desc)) from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table), where=( "where {} ".format(" and ".join(where_clauses)) ) if where_clauses else "", ) # Store current params and where_clauses for later: from_sql_params = dict(**params) from_sql_where_clauses = where_clauses[:] count_sql = "select count(*) {}".format(from_sql) _next = _next or special_args.get("_next") offset = "" if _next: if is_view: # _next is an offset offset = " offset {}".format(int(_next)) else: components = urlsafe_components(_next) # If a sort order is applied, the first of these is the sort value if sort or sort_desc: sort_value = components[0] # Special case for if non-urlencoded first token was $null if _next.split(",")[0] == "$null": sort_value = None components = components[1:] # Figure out the SQL for next-based-on-primary-key first next_by_pk_clauses = [] if use_rowid: next_by_pk_clauses.append("rowid > :p{}".format(len(params))) params["p{}".format(len(params))] = components[0] else: # Apply the tie-breaker based on primary keys if len(components) == len(pks): param_len = len(params) next_by_pk_clauses.append( compound_keys_after_sql(pks, param_len) ) for i, pk_value in enumerate(components): params["p{}".format(param_len + i)] = pk_value # Now add the sort SQL, which may incorporate next_by_pk_clauses if sort or sort_desc: if sort_value is None: if sort_desc: # Just items where column is null ordered by pk where_clauses.append( "({column} is null and {next_clauses})".format( column=escape_sqlite(sort_desc), next_clauses=" and ".join(next_by_pk_clauses), ) ) else: where_clauses.append( "({column} is not null or ({column} is null and {next_clauses}))".format( column=escape_sqlite(sort), next_clauses=" and ".join(next_by_pk_clauses), ) ) else: where_clauses.append( "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format( column=escape_sqlite(sort or sort_desc), op=">" if sort else "<", p=len(params), extra_desc_only="" if sort else " or {column2} is null".format( column2=escape_sqlite(sort or sort_desc) ), next_clauses=" and ".join(next_by_pk_clauses), ) ) params["p{}".format(len(params))] = sort_value order_by = "{}, {}".format(order_by, order_by_pks) else: where_clauses.extend(next_by_pk_clauses) where_clause = "" if where_clauses: where_clause = "where {} ".format(" and ".join(where_clauses)) if order_by: order_by = "order by {} ".format(order_by) # _group_count=col1&_group_count=col2 group_count = special_args_lists.get("_group_count") or [] if group_count: sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( group_cols=", ".join( '"{}"'.format(group_count_col) for group_count_col in group_count ), table_name=escape_sqlite(table), where=where_clause, ) return await self.custom_sql(request, name, hash, sql, editable=True) extra_args = {} # Handle ?_size=500 page_size = _size or request.raw_args.get("_size") if page_size: if page_size == "max": page_size = self.max_returned_rows try: page_size = int(page_size) if page_size < 0: raise ValueError except ValueError: raise DatasetteError("_size must be a positive integer", status=400) if page_size > self.max_returned_rows: raise DatasetteError( "_size must be <= {}".format(self.max_returned_rows), status=400 ) extra_args["page_size"] = page_size else: page_size = self.page_size sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format( select=select, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, limit=page_size + 1, offset=offset, ) if request.raw_args.get("_timelimit"): extra_args["custom_time_limit"] = int(request.raw_args["_timelimit"]) results = await self.ds.execute( name, sql, params, truncate=True, **extra_args ) # facets support facet_size = self.ds.config["default_facet_size"] metadata_facets = table_metadata.get("facets", []) facets = metadata_facets[:] if request.args.get("_facet") and not self.ds.config["allow_facet"]: raise DatasetteError("_facet= is not allowed", status=400) try: facets.extend(request.args["_facet"]) except KeyError: pass facet_results = {} facets_timed_out = [] for column in facets: if _next: continue facet_sql = """ select {col} as value, count(*) as count {from_sql} {and_or_where} {col} is not null group by {col} order by count desc limit {limit} """.format( col=escape_sqlite(column), from_sql=from_sql, and_or_where='and' if from_sql_where_clauses else 'where', limit=facet_size+1, ) try: facet_rows_results = await self.ds.execute( name, facet_sql, params, truncate=False, custom_time_limit=self.ds.config["facet_time_limit_ms"], ) facet_results_values = [] facet_results[column] = { "name": column, "results": facet_results_values, "truncated": len(facet_rows_results) > facet_size, } facet_rows = facet_rows_results.rows[:facet_size] # Attempt to expand foreign keys into labels values = [row["value"] for row in facet_rows] expanded = (await self.expand_foreign_keys( name, table, column, values )) for row in facet_rows: selected = str(other_args.get(column)) == str(row["value"]) if selected: toggle_path = path_with_removed_args( request, {column: str(row["value"])} ) else: toggle_path = path_with_added_args( request, {column: row["value"]} ) facet_results_values.append({ "value": row["value"], "label": expanded.get( (column, row["value"]), row["value"] ), "count": row["count"], "toggle_url": urllib.parse.urljoin( request.url, toggle_path ), "selected": selected, }) except InterruptedError: facets_timed_out.append(column) columns = [r[0] for r in results.description] rows = list(results.rows) filter_columns = columns[:] if use_rowid and filter_columns[0] == "rowid": filter_columns = filter_columns[1:] # Expand labeled columns if requested expanded_columns = [] expandable_columns = self.expandable_columns(name, table) columns_to_expand = None try: all_labels = value_as_boolean(special_args.get("_labels", "")) except ValueError: all_labels = default_labels # Check for explicit _label= if "_label" in request.args: columns_to_expand = request.args["_label"] if columns_to_expand is None and all_labels: # expand all columns with foreign keys columns_to_expand = [ fk["column"] for fk, _ in expandable_columns ] if columns_to_expand: expanded_labels = {} for fk, label_column in expandable_columns: column = fk["column"] if column not in columns_to_expand: continue expanded_columns.append(column) # Gather the values column_index = columns.index(column) values = [row[column_index] for row in rows] # Expand them expanded_labels.update(await self.expand_foreign_keys( name, table, column, values )) if expanded_labels: # Rewrite the rows new_rows = [] for row in rows: new_row = CustomRow(columns) for column in row.keys(): value = row[column] if (column, value) in expanded_labels: new_row[column] = { 'value': value, 'label': expanded_labels[(column, value)] } else: new_row[column] = value new_rows.append(new_row) rows = new_rows # Pagination next link next_value = None next_url = None if len(rows) > page_size and page_size > 0: if is_view: next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: prefix = rows[-2][sort or sort_desc] if prefix is None: prefix = "$null" else: prefix = urllib.parse.quote_plus(str(prefix)) next_value = "{},{}".format(prefix, next_value) added_args = {"_next": next_value} if sort: added_args["_sort"] = sort else: added_args["_sort_desc"] = sort_desc else: added_args = {"_next": next_value} next_url = urllib.parse.urljoin( request.url, path_with_replaced_args(request, added_args) ) rows = rows[:page_size] # Number of filtered rows in whole set: filtered_table_rows_count = None if count_sql: try: count_rows = list(await self.ds.execute( name, count_sql, from_sql_params )) filtered_table_rows_count = count_rows[0][0] except InterruptedError: pass # Detect suggested facets suggested_facets = [] if self.ds.config["suggest_facets"] and self.ds.config["allow_facet"]: for facet_column in columns: if facet_column in facets: continue if _next: continue if not self.ds.config["suggest_facets"]: continue suggested_facet_sql = ''' select distinct {column} {from_sql} {and_or_where} {column} is not null limit {limit} '''.format( column=escape_sqlite(facet_column), from_sql=from_sql, and_or_where='and' if from_sql_where_clauses else 'where', limit=facet_size+1 ) distinct_values = None try: distinct_values = await self.ds.execute( name, suggested_facet_sql, from_sql_params, truncate=False, custom_time_limit=self.ds.config["facet_suggest_time_limit_ms"], ) num_distinct_values = len(distinct_values) if ( num_distinct_values and num_distinct_values > 1 and num_distinct_values <= facet_size and num_distinct_values < filtered_table_rows_count ): suggested_facets.append({ 'name': facet_column, 'toggle_url': path_with_added_args( request, {'_facet': facet_column} ), }) except InterruptedError: pass # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en(extra=search_descriptions) if sort or sort_desc: sorted_by = "sorted by {}{}".format( (sort or sort_desc), " descending" if sort_desc else "" ) human_description_en = " ".join( [b for b in [human_description_en, sorted_by] if b] ) async def extra_template(): display_columns, display_rows = await self.display_columns_and_rows( name, table, results.description, rows, link_column=not is_view, ) metadata = self.ds.metadata.get("databases", {}).get(name, {}).get( "tables", {} ).get( table, {} ) self.ds.update_with_inherited_metadata(metadata) return { "database_hash": hash, "supports_search": bool(fts_table), "search": search or "", "use_rowid": use_rowid, "filters": filters, "display_columns": display_columns, "filter_columns": filter_columns, "display_rows": display_rows, "facets_timed_out": facets_timed_out, "sorted_facet_results": sorted( facet_results.values(), key=lambda f: (len(f["results"]), f["name"]), reverse=True ), "facet_hideable": lambda facet: facet not in metadata_facets, "is_sortable": any(c["sortable"] for c in display_columns), "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, "append_querystring": append_querystring, "request": request, "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, "custom_rows_and_columns_templates": [ "_rows_and_columns-{}-{}.html".format( to_css_class(name), to_css_class(table) ), "_rows_and_columns-table-{}-{}.html".format( to_css_class(name), to_css_class(table) ), "_rows_and_columns.html", ], "metadata": metadata, "view_definition": await self.ds.get_view_definition(name, table), "table_definition": await self.ds.get_table_definition(name, table), } return { "database": name, "table": table, "is_view": is_view, "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": results.truncated, "table_rows_count": table_rows_count, "filtered_table_rows_count": filtered_table_rows_count, "expanded_columns": expanded_columns, "expandable_columns": expandable_columns, "columns": columns, "primary_keys": pks, "units": units, "query": {"sql": sql, "params": params}, "facet_results": facet_results, "suggested_facets": suggested_facets, "next": next_value and str(next_value) or None, "next_url": next_url, }, extra_template, ( "table-{}-{}.html".format(to_css_class(name), to_css_class(table)), "table.html", )
async def _data_traced( self, request, default_labels=False, _next=None, _size=None, ): database_route = tilde_decode(request.url_vars["database"]) table_name = tilde_decode(request.url_vars["table"]) try: db = self.ds.get_database(route=database_route) except KeyError: raise NotFound("Database not found: {}".format(database_route)) database_name = db.name # For performance profiling purposes, ?_noparallel=1 turns off asyncio.gather async def _gather_parallel(*args): return await asyncio.gather(*args) async def _gather_sequential(*args): results = [] for fn in args: results.append(await fn) return results gather = (_gather_sequential if request.args.get("_noparallel") else _gather_parallel) # If this is a canned query, not a table, then dispatch to QueryView instead canned_query = await self.ds.get_canned_query(database_name, table_name, request.actor) if canned_query: return await QueryView(self.ds).data( request, canned_query["sql"], metadata=canned_query, editable=False, canned_query=table_name, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) is_view, table_exists = map( bool, await gather(db.get_view_definition(table_name), db.table_exists(table_name)), ) # If table or view not found, return 404 if not is_view and not table_exists: raise NotFound(f"Table not found: {table_name}") # Ensure user has permission to view this table await self.ds.ensure_permissions( request.actor, [ ("view-table", (database_name, table_name)), ("view-database", database_name), "view-instance", ], ) private = not await self.ds.permission_allowed( None, "view-table", (database_name, table_name), default=True) # Handle ?_filter_column and redirect, if present redirect_params = filters_should_redirect(request.args) if redirect_params: return self.redirect( request, path_with_added_args(request, redirect_params), forward_querystring=False, ) # If ?_sort_by_desc=on (from checkbox) redirect to _sort_desc=(_sort) if "_sort_by_desc" in request.args: return self.redirect( request, path_with_added_args( request, { "_sort_desc": request.args.get("_sort"), "_sort_by_desc": None, "_sort": None, }, ), forward_querystring=False, ) # Introspect columns and primary keys for table pks = await db.primary_keys(table_name) table_columns = await db.table_columns(table_name) # Take ?_col= and ?_nocol= into account specified_columns = await self.columns_to_select( table_columns, pks, request) select_specified_columns = ", ".join( escape_sqlite(t) for t in specified_columns) select_all_columns = ", ".join(escape_sqlite(t) for t in table_columns) # rowid tables (no specified primary key) need a different SELECT use_rowid = not pks and not is_view if use_rowid: select_specified_columns = f"rowid, {select_specified_columns}" select_all_columns = f"rowid, {select_all_columns}" order_by = "rowid" order_by_pks = "rowid" else: order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" nocount = request.args.get("_nocount") nofacet = request.args.get("_nofacet") nosuggest = request.args.get("_nosuggest") if request.args.get("_shape") in ("array", "object"): nocount = True nofacet = True table_metadata = self.ds.table_metadata(database_name, table_name) units = table_metadata.get("units", {}) # Arguments that start with _ and don't contain a __ are # special - things like ?_search= - and should not be # treated as filters. filter_args = [] for key in request.args: if not (key.startswith("_") and "__" not in key): for v in request.args.getlist(key): filter_args.append((key, v)) # Build where clauses from query string arguments filters = Filters(sorted(filter_args), units, ureg) where_clauses, params = filters.build_where_clauses(table_name) # Execute filters_from_request plugin hooks - including the default # ones that live in datasette/filters.py extra_context_from_filters = {} extra_human_descriptions = [] for hook in pm.hook.filters_from_request( request=request, table=table_name, database=database_name, datasette=self.ds, ): filter_arguments = await await_me_maybe(hook) if filter_arguments: where_clauses.extend(filter_arguments.where_clauses) params.update(filter_arguments.params) extra_human_descriptions.extend( filter_arguments.human_descriptions) extra_context_from_filters.update( filter_arguments.extra_context) # Deal with custom sort orders sortable_columns = await self.sortable_columns_for_table( database_name, table_name, use_rowid) sort = request.args.get("_sort") sort_desc = request.args.get("_sort_desc") if not sort and not sort_desc: sort = table_metadata.get("sort") sort_desc = table_metadata.get("sort_desc") if sort and sort_desc: raise DatasetteError( "Cannot use _sort and _sort_desc at the same time") if sort: if sort not in sortable_columns: raise DatasetteError(f"Cannot sort table by {sort}") order_by = escape_sqlite(sort) if sort_desc: if sort_desc not in sortable_columns: raise DatasetteError(f"Cannot sort table by {sort_desc}") order_by = f"{escape_sqlite(sort_desc)} desc" from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table_name), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", ) # Copy of params so we can mutate them later: from_sql_params = dict(**params) count_sql = f"select count(*) {from_sql}" # Handle pagination driven by ?_next= _next = _next or request.args.get("_next") offset = "" if _next: sort_value = None if is_view: # _next is an offset offset = f" offset {int(_next)}" else: components = urlsafe_components(_next) # If a sort order is applied and there are multiple components, # the first of these is the sort value if (sort or sort_desc) and (len(components) > 1): sort_value = components[0] # Special case for if non-urlencoded first token was $null if _next.split(",")[0] == "$null": sort_value = None components = components[1:] # Figure out the SQL for next-based-on-primary-key first next_by_pk_clauses = [] if use_rowid: next_by_pk_clauses.append(f"rowid > :p{len(params)}") params[f"p{len(params)}"] = components[0] else: # Apply the tie-breaker based on primary keys if len(components) == len(pks): param_len = len(params) next_by_pk_clauses.append( compound_keys_after_sql(pks, param_len)) for i, pk_value in enumerate(components): params[f"p{param_len + i}"] = pk_value # Now add the sort SQL, which may incorporate next_by_pk_clauses if sort or sort_desc: if sort_value is None: if sort_desc: # Just items where column is null ordered by pk where_clauses.append( "({column} is null and {next_clauses})".format( column=escape_sqlite(sort_desc), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} is not null or ({column} is null and {next_clauses}))" .format( column=escape_sqlite(sort), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))" .format( column=escape_sqlite(sort or sort_desc), op=">" if sort else "<", p=len(params), extra_desc_only="" if sort else " or {column2} is null".format( column2=escape_sqlite(sort or sort_desc)), next_clauses=" and ".join(next_by_pk_clauses), )) params[f"p{len(params)}"] = sort_value order_by = f"{order_by}, {order_by_pks}" else: where_clauses.extend(next_by_pk_clauses) where_clause = "" if where_clauses: where_clause = f"where {' and '.join(where_clauses)} " if order_by: order_by = f"order by {order_by}" extra_args = {} # Handle ?_size=500 page_size = _size or request.args.get("_size") or table_metadata.get( "size") if page_size: if page_size == "max": page_size = self.ds.max_returned_rows try: page_size = int(page_size) if page_size < 0: raise ValueError except ValueError: raise BadRequest("_size must be a positive integer") if page_size > self.ds.max_returned_rows: raise BadRequest( f"_size must be <= {self.ds.max_returned_rows}") extra_args["page_size"] = page_size else: page_size = self.ds.page_size # Facets are calculated against SQL without order by or limit sql_no_order_no_limit = ( "select {select_all_columns} from {table_name} {where}".format( select_all_columns=select_all_columns, table_name=escape_sqlite(table_name), where=where_clause, )) # This is the SQL that populates the main table on the page sql = "select {select_specified_columns} from {table_name} {where}{order_by} limit {page_size}{offset}".format( select_specified_columns=select_specified_columns, table_name=escape_sqlite(table_name), where=where_clause, order_by=order_by, page_size=page_size + 1, offset=offset, ) if request.args.get("_timelimit"): extra_args["custom_time_limit"] = int( request.args.get("_timelimit")) # Execute the main query! results = await db.execute(sql, params, truncate=True, **extra_args) # Calculate the total count for this query filtered_table_rows_count = None if (not db.is_mutable and self.ds.inspect_data and count_sql == f"select count(*) from {table_name} "): # We can use a previously cached table row count try: filtered_table_rows_count = self.ds.inspect_data[ database_name]["tables"][table_name]["count"] except KeyError: pass # Otherwise run a select count(*) ... if count_sql and filtered_table_rows_count is None and not nocount: try: count_rows = list(await db.execute(count_sql, from_sql_params)) filtered_table_rows_count = count_rows[0][0] except QueryInterrupted: pass # Faceting if not self.ds.setting("allow_facet") and any( arg.startswith("_facet") for arg in request.args): raise BadRequest("_facet= is not allowed") # pylint: disable=no-member facet_classes = list( itertools.chain.from_iterable(pm.hook.register_facet_classes())) facet_results = {} facets_timed_out = [] facet_instances = [] for klass in facet_classes: facet_instances.append( klass( self.ds, request, database_name, sql=sql_no_order_no_limit, params=params, table=table_name, metadata=table_metadata, row_count=filtered_table_rows_count, )) async def execute_facets(): if not nofacet: # Run them in parallel facet_awaitables = [ facet.facet_results() for facet in facet_instances ] facet_awaitable_results = await gather(*facet_awaitables) for ( instance_facet_results, instance_facets_timed_out, ) in facet_awaitable_results: for facet_info in instance_facet_results: base_key = facet_info["name"] key = base_key i = 1 while key in facet_results: i += 1 key = f"{base_key}_{i}" facet_results[key] = facet_info facets_timed_out.extend(instance_facets_timed_out) suggested_facets = [] async def execute_suggested_facets(): # Calculate suggested facets if (self.ds.setting("suggest_facets") and self.ds.setting("allow_facet") and not _next and not nofacet and not nosuggest): # Run them in parallel facet_suggest_awaitables = [ facet.suggest() for facet in facet_instances ] for suggest_result in await gather(*facet_suggest_awaitables): suggested_facets.extend(suggest_result) await gather(execute_facets(), execute_suggested_facets()) # Figure out columns and rows for the query columns = [r[0] for r in results.description] rows = list(results.rows) # Expand labeled columns if requested expanded_columns = [] expandable_columns = await self.expandable_columns( database_name, table_name) columns_to_expand = None try: all_labels = value_as_boolean(request.args.get("_labels", "")) except ValueError: all_labels = default_labels # Check for explicit _label= if "_label" in request.args: columns_to_expand = request.args.getlist("_label") if columns_to_expand is None and all_labels: # expand all columns with foreign keys columns_to_expand = [fk["column"] for fk, _ in expandable_columns] if columns_to_expand: expanded_labels = {} for fk, _ in expandable_columns: column = fk["column"] if column not in columns_to_expand: continue if column not in columns: continue expanded_columns.append(column) # Gather the values column_index = columns.index(column) values = [row[column_index] for row in rows] # Expand them expanded_labels.update(await self.ds.expand_foreign_keys( database_name, table_name, column, values)) if expanded_labels: # Rewrite the rows new_rows = [] for row in rows: new_row = CustomRow(columns) for column in row.keys(): value = row[column] if (column, value ) in expanded_labels and value is not None: new_row[column] = { "value": value, "label": expanded_labels[(column, value)], } else: new_row[column] = value new_rows.append(new_row) rows = new_rows # Pagination next link next_value = None next_url = None if 0 < page_size < len(rows): if is_view: next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: prefix = rows[-2][sort or sort_desc] if isinstance(prefix, dict) and "value" in prefix: prefix = prefix["value"] if prefix is None: prefix = "$null" else: prefix = tilde_encode(str(prefix)) next_value = f"{prefix},{next_value}" added_args = {"_next": next_value} if sort: added_args["_sort"] = sort else: added_args["_sort_desc"] = sort_desc else: added_args = {"_next": next_value} next_url = self.ds.absolute_url( request, self.ds.urls.path(path_with_replaced_args(request, added_args))) rows = rows[:page_size] # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en( extra=extra_human_descriptions) if sort or sort_desc: sorted_by = "sorted by {}{}".format( (sort or sort_desc), " descending" if sort_desc else "") human_description_en = " ".join( [b for b in [human_description_en, sorted_by] if b]) async def extra_template(): nonlocal sort display_columns, display_rows = await display_columns_and_rows( self.ds, database_name, table_name, results.description, rows, link_column=not is_view, truncate_cells=self.ds.setting("truncate_cells_html"), sortable_columns=await self.sortable_columns_for_table(database_name, table_name, use_rowid=True), ) metadata = ((self.ds.metadata("databases") or {}).get(database_name, {}).get("tables", {}).get(table_name, {})) self.ds.update_with_inherited_metadata(metadata) form_hidden_args = [] for key in request.args: if (key.startswith("_") and key not in ("_sort", "_search", "_next") and "__" not in key): for value in request.args.getlist(key): form_hidden_args.append((key, value)) # if no sort specified AND table has a single primary key, # set sort to that so arrow is displayed if not sort and not sort_desc: if 1 == len(pks): sort = pks[0] elif use_rowid: sort = "rowid" async def table_actions(): links = [] for hook in pm.hook.table_actions( datasette=self.ds, table=table_name, database=database_name, actor=request.actor, request=request, ): extra_links = await await_me_maybe(hook) if extra_links: links.extend(extra_links) return links # filter_columns combine the columns we know are available # in the table with any additional columns (such as rowid) # which are available in the query filter_columns = list(columns) + [ table_column for table_column in table_columns if table_column not in columns ] d = { "table_actions": table_actions, "use_rowid": use_rowid, "filters": filters, "display_columns": display_columns, "filter_columns": filter_columns, "display_rows": display_rows, "facets_timed_out": facets_timed_out, "sorted_facet_results": sorted( facet_results.values(), key=lambda f: (len(f["results"]), f["name"]), reverse=True, ), "form_hidden_args": form_hidden_args, "is_sortable": any(c["sortable"] for c in display_columns), "fix_path": self.ds.urls.path, "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, "append_querystring": append_querystring, "request": request, "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, "custom_table_templates": [ f"_table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", f"_table-table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", "_table.html", ], "metadata": metadata, "view_definition": await db.get_view_definition(table_name), "table_definition": await db.get_table_definition(table_name), "datasette_allow_facet": "true" if self.ds.setting("allow_facet") else "false", } d.update(extra_context_from_filters) return d return ( { "database": database_name, "table": table_name, "is_view": is_view, "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": results.truncated, "filtered_table_rows_count": filtered_table_rows_count, "expanded_columns": expanded_columns, "expandable_columns": expandable_columns, "columns": columns, "primary_keys": pks, "units": units, "query": { "sql": sql, "params": params }, "facet_results": facet_results, "suggested_facets": suggested_facets, "next": next_value and str(next_value) or None, "next_url": next_url, "private": private, "allow_execute_sql": await self.ds.permission_allowed(request.actor, "execute-sql", database_name, default=True), }, extra_template, ( f"table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", "table.html", ), )
async def facet_results(self): facet_results = {} facets_timed_out = [] args = dict(self.get_querystring_pairs()) facet_size = self.ds.config("default_facet_size") for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] column = config.get("column") or config["simple"] # TODO: does this query break if inner sql produces value or count columns? facet_sql = """ select date({col}) as value, count(*) as count from ( {sql} ) where date({col}) is not null group by date({col}) order by count desc limit {limit} """.format(col=escape_sqlite(column), sql=self.sql, limit=facet_size + 1) try: facet_rows_results = await self.ds.execute( self.database, facet_sql, self.params, truncate=False, custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] facet_results[column] = { "name": column, "type": self.type, "results": facet_results_values, "hideable": source != "metadata", "toggle_url": path_with_removed_args(self.request, {"_facet_date": column}), "truncated": len(facet_rows_results) > facet_size, } facet_rows = facet_rows_results.rows[:facet_size] for row in facet_rows: selected = str(args.get("{}__date".format(column))) == str( row["value"]) if selected: toggle_path = path_with_removed_args( self.request, {"{}__date".format(column): str(row["value"])}) else: toggle_path = path_with_added_args( self.request, {"{}__date".format(column): row["value"]}) facet_results_values.append({ "value": row["value"], "label": row["value"], "count": row["count"], "toggle_url": self.ds.absolute_url(self.request, toggle_path), "selected": selected, }) except InterruptedError: facets_timed_out.append(column) return facet_results, facets_timed_out
async def view_get(self, request, name, hash, **kwargs): try: as_json = kwargs.pop("as_json") except KeyError: as_json = False extra_template_data = {} start = time.time() status_code = 200 templates = [] try: response_or_template_contexts = await self.data( request, name, hash, **kwargs) if isinstance(response_or_template_contexts, response.HTTPResponse): return response_or_template_contexts else: data, extra_template_data, templates = response_or_template_contexts except InterruptedError as e: raise DatasetteError(str(e), title="SQL Interrupted", status=400) except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except (sqlite3.OperationalError) as e: raise DatasetteError(str(e)) except DatasetteError: raise end = time.time() data["query_ms"] = (end - start) * 1000 for key in ("source", "source_url", "license", "license_url"): value = self.ds.metadata.get(key) if value: data[key] = value if as_json: # Special case for .jsono extension - redirect to _shape=objects if as_json == ".jsono": return self.redirect( request, path_with_added_args( request, {"_shape": "objects"}, path=request.path.rsplit(".jsono", 1)[0] + ".json", ), forward_querystring=False, ) # Deal with the _shape option shape = request.args.get("_shape", "arrays") if shape in ("objects", "object", "array"): columns = data.get("columns") rows = data.get("rows") if rows and columns: data["rows"] = [dict(zip(columns, row)) for row in rows] if shape == "object": error = None if "primary_keys" not in data: error = "_shape=object is only available on tables" else: pks = data["primary_keys"] if not pks: error = "_shape=object not available for tables with no primary keys" else: object_rows = {} for row in data["rows"]: pk_string = path_from_row_pks( row, pks, not pks) object_rows[pk_string] = row data = object_rows if error: data = { "ok": False, "error": error, "database": name, "database_hash": hash, } elif shape == "array": data = data["rows"] elif shape == "arrays": pass else: status_code = 400 data = { "ok": False, "error": "Invalid _shape: {}".format(shape), "status": 400, "title": None, } headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" r = response.HTTPResponse( json.dumps(data, cls=CustomJSONEncoder), status=status_code, content_type="application/json", headers=headers, ) else: extras = {} if callable(extra_template_data): extras = extra_template_data() if asyncio.iscoroutine(extras): extras = await extras else: extras = extra_template_data context = { **data, **extras, **{ "url_json": path_with_ext(request, ".json"), "url_jsono": path_with_ext(request, ".jsono"), "extra_css_urls": self.ds.extra_css_urls(), "extra_js_urls": self.ds.extra_js_urls(), "datasette_version": __version__, } } if "metadata" not in context: context["metadata"] = self.ds.metadata r = self.render(templates, **context) r.status = status_code # Set far-future cache expiry if self.ds.cache_headers: r.headers["Cache-Control"] = "max-age={}".format(365 * 24 * 60 * 60) return r
async def facet_results(self): facet_results = {} facets_timed_out = [] args = set(self.get_querystring_pairs()) facet_size = self.ds.config("default_facet_size") all_foreign_keys = await self.ds.execute_against_connection_in_thread( self.database, get_all_foreign_keys) if not all_foreign_keys.get(self.table): return [], [] # We care about three tables: self.table, middle_table and destination_table incoming = all_foreign_keys[self.table]["incoming"] for source_and_config in self.get_configs(): config = source_and_config["config"] source = source_and_config["source"] # The destination_table is specified in the _facet_m2m=xxx parameter destination_table = config.get("column") or config["simple"] # Find middle table - it has fks to self.table AND destination_table fks = None middle_table = None for fk in incoming: other_table = fk["other_table"] other_table_outgoing_foreign_keys = all_foreign_keys[ other_table]["outgoing"] if (any(o for o in other_table_outgoing_foreign_keys if o["other_table"] == destination_table) and len(other_table_outgoing_foreign_keys) == 2): fks = other_table_outgoing_foreign_keys middle_table = other_table break if middle_table is None or fks is None: return [], [] # Now that we have determined the middle_table, we need to figure out the three # columns on that table which are relevant to us. These are: # column_to_table - the middle_table column with a foreign key to self.table # table_pk - the primary key column on self.table that is referenced # column_to_destination - the column with a foreign key to destination_table # # It turns out we don't actually need the fourth obvious column: # destination_pk = the primary key column on destination_table which is referenced # # These are both in the fks array - which now contains 2 foreign key relationships, e.g: # [ # {'other_table': 'characteristic', 'column': 'characteristic_id', 'other_column': 'pk'}, # {'other_table': 'attractions', 'column': 'attraction_id', 'other_column': 'pk'} # ] column_to_table = None table_pk = None column_to_destination = None for fk in fks: if fk["other_table"] == self.table: table_pk = fk["other_column"] column_to_table = fk["column"] elif fk["other_table"] == destination_table: column_to_destination = fk["column"] assert all((column_to_table, table_pk, column_to_destination)) facet_sql = """ select {middle_table}.{column_to_destination} as value, count(distinct {middle_table}.{column_to_table}) as count from {middle_table} where {middle_table}.{column_to_table} in ( select {table_pk} from ({sql}) ) group by {middle_table}.{column_to_destination} order by count desc limit {limit} """.format( sql=self.sql, limit=facet_size + 1, middle_table=escape_sqlite(middle_table), column_to_destination=escape_sqlite(column_to_destination), column_to_table=escape_sqlite(column_to_table), table_pk=escape_sqlite(table_pk), ) try: facet_rows_results = await self.ds.execute( self.database, facet_sql, self.params, truncate=False, custom_time_limit=self.ds.config("facet_time_limit_ms"), ) facet_results_values = [] facet_results[destination_table] = { "name": destination_table, "type": self.type, "results": facet_results_values, "hideable": source != "metadata", "toggle_url": path_with_removed_args(self.request, {"_facet_m2m": destination_table}), "truncated": len(facet_rows_results) > facet_size, } facet_rows = facet_rows_results.rows[:facet_size] # Attempt to expand foreign keys into labels values = [row["value"] for row in facet_rows] expanded = await self.ds.expand_foreign_keys( self.database, middle_table, column_to_destination, values) for row in facet_rows: through = json.dumps( { "table": middle_table, "column": column_to_destination, "value": str(row["value"]), }, separators=(",", ":"), sort_keys=True, ) selected = ("_through", through) in args if selected: toggle_path = path_with_removed_args( self.request, {"_through": through}) else: toggle_path = path_with_added_args( self.request, {"_through": through}) facet_results_values.append({ "value": row["value"], "label": expanded.get((column_to_destination, row["value"]), row["value"]), "count": row["count"], "toggle_url": self.ds.absolute_url(self.request, toggle_path), "selected": selected, }) except InterruptedError: facets_timed_out.append(destination_table) return facet_results, facets_timed_out
def test_path_with_added_args(path, added_args, expected): request = Request.fake(path) actual = utils.path_with_added_args(request, added_args) assert expected == actual