async def render_blob(datasette, database, rows, columns, request, table, view_name): if _BLOB_COLUMN not in request.args: raise BadRequest(f"?{_BLOB_COLUMN}= is required") blob_column = request.args[_BLOB_COLUMN] if blob_column not in columns: raise BadRequest(f"{blob_column} is not a valid column") # If ?_blob_hash= provided, use that to select the row - otherwise use first row blob_hash = None if _BLOB_HASH in request.args: blob_hash = request.args[_BLOB_HASH] for row in rows: value = row[blob_column] if hashlib.sha256(value).hexdigest() == blob_hash: break else: # Loop did not break raise BadRequest( "Link has expired - the requested binary content has changed or could not be found." ) else: row = rows[0] value = row[blob_column] filename_bits = [] if table: filename_bits.append(to_css_class(table)) if "pk_path" in request.url_vars: filename_bits.append(request.url_vars["pk_path"]) filename_bits.append(to_css_class(blob_column)) if blob_hash: filename_bits.append(blob_hash[:6]) filename = "-".join(filename_bits) + ".blob" headers = { "X-Content-Type-Options": "nosniff", "Content-Disposition": f'attachment; filename="{filename}"', } return Response( body=value or b"", status=200, headers=headers, content_type="application/binary", )
async def data( self, request, database, hash, table, default_labels=False, _next=None, _size=None, ): canned_query = await self.ds.get_canned_query(database, table, request.actor) if canned_query: return await QueryView(self.ds).data( request, database, hash, canned_query["sql"], metadata=canned_query, editable=False, canned_query=table, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) db = self.ds.databases[database] is_view = bool(await db.get_view_definition(table)) table_exists = bool(await db.table_exists(table)) if not is_view and not table_exists: raise NotFound(f"Table not found: {table}") await self.check_permissions( request, [ ("view-table", (database, table)), ("view-database", database), "view-instance", ], ) private = not await self.ds.permission_allowed( None, "view-table", (database, table), default=True) pks = await db.primary_keys(table) table_column_details = await db.table_column_details(table) table_columns = [column.name for column in table_column_details] select_columns = ", ".join(escape_sqlite(t) for t in table_columns) use_rowid = not pks and not is_view if use_rowid: select = f"rowid, {select_columns}" order_by = "rowid" order_by_pks = "rowid" else: select = select_columns order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" # Ensure we don't drop anything with an empty value e.g. ?name__exact= args = MultiParams( urllib.parse.parse_qs(request.query_string, keep_blank_values=True)) # Special args start with _ and do not contain a __ # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} other_args = [] for key in args: if key.startswith("_") and "__" not in key: special_args[key] = args[key] else: for v in args.getlist(key): other_args.append((key, v)) # Handle ?_filter_column and redirect, if present redirect_params = filters_should_redirect(special_args) if redirect_params: return self.redirect( request, path_with_added_args(request, redirect_params), forward_querystring=False, ) # If ?_sort_by_desc=on (from checkbox) redirect to _sort_desc=(_sort) if "_sort_by_desc" in special_args: return self.redirect( request, path_with_added_args( request, { "_sort_desc": special_args.get("_sort"), "_sort_by_desc": None, "_sort": None, }, ), forward_querystring=False, ) table_metadata = self.ds.table_metadata(database, table) units = table_metadata.get("units", {}) filters = Filters(sorted(other_args), units, ureg) where_clauses, params = filters.build_where_clauses(table) extra_wheres_for_ui = [] # Add _where= from querystring if "_where" in request.args: if not await self.ds.permission_allowed( request.actor, "execute-sql", resource=database, default=True, ): raise DatasetteError("_where= is not allowed", status=403) else: where_clauses.extend(request.args.getlist("_where")) extra_wheres_for_ui = [{ "text": text, "remove_url": path_with_removed_args(request, {"_where": text}), } for text in request.args.getlist("_where")] # Support for ?_through={table, column, value} extra_human_descriptions = [] if "_through" in request.args: for through in request.args.getlist("_through"): through_data = json.loads(through) through_table = through_data["table"] other_column = through_data["column"] value = through_data["value"] outgoing_foreign_keys = await db.foreign_keys_for_table( through_table) try: fk_to_us = [ fk for fk in outgoing_foreign_keys if fk["other_table"] == table ][0] except IndexError: raise DatasetteError( "Invalid _through - could not find corresponding foreign key" ) param = f"p{len(params)}" where_clauses.append( "{our_pk} in (select {our_column} from {through_table} where {other_column} = :{param})" .format( through_table=escape_sqlite(through_table), our_pk=escape_sqlite(fk_to_us["other_column"]), our_column=escape_sqlite(fk_to_us["column"]), other_column=escape_sqlite(other_column), param=param, )) params[param] = value extra_human_descriptions.append( f'{through_table}.{other_column} = "{value}"') # _search support: fts_table = special_args.get("_fts_table") fts_table = fts_table or table_metadata.get("fts_table") fts_table = fts_table or await db.fts_table(table) fts_pk = special_args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) search_args = dict(pair for pair in special_args.items() if pair[0].startswith("_search")) search = "" search_mode_raw = special_args.get("_searchmode") == "raw" if fts_table and search_args: if "_search" in search_args: # Simple ?_search=xxx search = search_args["_search"] where_clauses.append( "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})" .format( fts_table=escape_sqlite(fts_table), fts_pk=escape_sqlite(fts_pk), match_clause=":search" if search_mode_raw else "escape_fts(:search)", )) extra_human_descriptions.append(f'search matches "{search}"') params["search"] = search else: # More complex: search against specific columns for i, (key, search_text) in enumerate(search_args.items()): search_col = key.split("_search_", 1)[1] if search_col not in await db.table_columns(fts_table): raise BadRequest("Cannot search by that column") where_clauses.append( "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})" .format( fts_table=escape_sqlite(fts_table), search_col=escape_sqlite(search_col), match_clause=":search_{}".format(i) if search_mode_raw else "escape_fts(:search_{})".format(i), )) extra_human_descriptions.append( f'search column "{search_col}" matches "{search_text}"' ) params[f"search_{i}"] = search_text sortable_columns = set() sortable_columns = await self.sortable_columns_for_table( database, table, use_rowid) # Allow for custom sort order sort = special_args.get("_sort") sort_desc = special_args.get("_sort_desc") if not sort and not sort_desc: sort = table_metadata.get("sort") sort_desc = table_metadata.get("sort_desc") if sort and sort_desc: raise DatasetteError( "Cannot use _sort and _sort_desc at the same time") if sort: if sort not in sortable_columns: raise DatasetteError(f"Cannot sort table by {sort}") order_by = escape_sqlite(sort) if sort_desc: if sort_desc not in sortable_columns: raise DatasetteError(f"Cannot sort table by {sort_desc}") order_by = f"{escape_sqlite(sort_desc)} desc" from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", ) # Copy of params so we can mutate them later: from_sql_params = dict(**params) count_sql = f"select count(*) {from_sql}" _next = _next or special_args.get("_next") offset = "" if _next: if is_view: # _next is an offset offset = f" offset {int(_next)}" else: components = urlsafe_components(_next) # If a sort order is applied, the first of these is the sort value if sort or sort_desc: sort_value = components[0] # Special case for if non-urlencoded first token was $null if _next.split(",")[0] == "$null": sort_value = None components = components[1:] # Figure out the SQL for next-based-on-primary-key first next_by_pk_clauses = [] if use_rowid: next_by_pk_clauses.append(f"rowid > :p{len(params)}") params[f"p{len(params)}"] = components[0] else: # Apply the tie-breaker based on primary keys if len(components) == len(pks): param_len = len(params) next_by_pk_clauses.append( compound_keys_after_sql(pks, param_len)) for i, pk_value in enumerate(components): params[f"p{param_len + i}"] = pk_value # Now add the sort SQL, which may incorporate next_by_pk_clauses if sort or sort_desc: if sort_value is None: if sort_desc: # Just items where column is null ordered by pk where_clauses.append( "({column} is null and {next_clauses})".format( column=escape_sqlite(sort_desc), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} is not null or ({column} is null and {next_clauses}))" .format( column=escape_sqlite(sort), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))" .format( column=escape_sqlite(sort or sort_desc), op=">" if sort else "<", p=len(params), extra_desc_only="" if sort else " or {column2} is null".format( column2=escape_sqlite(sort or sort_desc)), next_clauses=" and ".join(next_by_pk_clauses), )) params[f"p{len(params)}"] = sort_value order_by = f"{order_by}, {order_by_pks}" else: where_clauses.extend(next_by_pk_clauses) where_clause = "" if where_clauses: where_clause = f"where {' and '.join(where_clauses)} " if order_by: order_by = f"order by {order_by} " extra_args = {} # Handle ?_size=500 page_size = _size or request.args.get("_size") or table_metadata.get( "size") if page_size: if page_size == "max": page_size = self.ds.max_returned_rows try: page_size = int(page_size) if page_size < 0: raise ValueError except ValueError: raise BadRequest("_size must be a positive integer") if page_size > self.ds.max_returned_rows: raise BadRequest( f"_size must be <= {self.ds.max_returned_rows}") extra_args["page_size"] = page_size else: page_size = self.ds.page_size sql_no_limit = "select {select} from {table_name} {where}{order_by}".format( select=select, table_name=escape_sqlite(table), where=where_clause, order_by=order_by, ) sql = f"{sql_no_limit.rstrip()} limit {page_size + 1}{offset}" if request.args.get("_timelimit"): extra_args["custom_time_limit"] = int( request.args.get("_timelimit")) results = await db.execute(sql, params, truncate=True, **extra_args) # Number of filtered rows in whole set: filtered_table_rows_count = None if (not db.is_mutable and self.ds.inspect_data and count_sql == f"select count(*) from {table} "): try: filtered_table_rows_count = self.ds.inspect_data[database][ "tables"][table]["count"] except KeyError: pass if count_sql and filtered_table_rows_count is None: try: count_rows = list(await db.execute(count_sql, from_sql_params)) filtered_table_rows_count = count_rows[0][0] except QueryInterrupted: pass # facets support if not self.ds.config("allow_facet") and any( arg.startswith("_facet") for arg in request.args): raise BadRequest("_facet= is not allowed") # pylint: disable=no-member facet_classes = list( itertools.chain.from_iterable(pm.hook.register_facet_classes())) facet_results = {} facets_timed_out = [] facet_instances = [] for klass in facet_classes: facet_instances.append( klass( self.ds, request, database, sql=sql_no_limit, params=params, table=table, metadata=table_metadata, row_count=filtered_table_rows_count, )) for facet in facet_instances: ( instance_facet_results, instance_facets_timed_out, ) = await facet.facet_results() facet_results.update(instance_facet_results) facets_timed_out.extend(instance_facets_timed_out) # Figure out columns and rows for the query columns = [r[0] for r in results.description] rows = list(results.rows) # Expand labeled columns if requested expanded_columns = [] expandable_columns = await self.expandable_columns(database, table) columns_to_expand = None try: all_labels = value_as_boolean(special_args.get("_labels", "")) except ValueError: all_labels = default_labels # Check for explicit _label= if "_label" in request.args: columns_to_expand = request.args.getlist("_label") if columns_to_expand is None and all_labels: # expand all columns with foreign keys columns_to_expand = [fk["column"] for fk, _ in expandable_columns] if columns_to_expand: expanded_labels = {} for fk, _ in expandable_columns: column = fk["column"] if column not in columns_to_expand: continue expanded_columns.append(column) # Gather the values column_index = columns.index(column) values = [row[column_index] for row in rows] # Expand them expanded_labels.update(await self.ds.expand_foreign_keys( database, table, column, values)) if expanded_labels: # Rewrite the rows new_rows = [] for row in rows: new_row = CustomRow(columns) for column in row.keys(): value = row[column] if (column, value ) in expanded_labels and value is not None: new_row[column] = { "value": value, "label": expanded_labels[(column, value)], } else: new_row[column] = value new_rows.append(new_row) rows = new_rows # Pagination next link next_value = None next_url = None if len(rows) > page_size and page_size > 0: if is_view: next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: prefix = rows[-2][sort or sort_desc] if isinstance(prefix, dict) and "value" in prefix: prefix = prefix["value"] if prefix is None: prefix = "$null" else: prefix = urllib.parse.quote_plus(str(prefix)) next_value = f"{prefix},{next_value}" added_args = {"_next": next_value} if sort: added_args["_sort"] = sort else: added_args["_sort_desc"] = sort_desc else: added_args = {"_next": next_value} next_url = self.ds.absolute_url( request, path_with_replaced_args(request, added_args)) rows = rows[:page_size] # Detect suggested facets suggested_facets = [] if (self.ds.config("suggest_facets") and self.ds.config("allow_facet") and not _next): for facet in facet_instances: suggested_facets.extend(await facet.suggest()) # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en( extra=extra_human_descriptions) if sort or sort_desc: sorted_by = "sorted by {}{}".format( (sort or sort_desc), " descending" if sort_desc else "") human_description_en = " ".join( [b for b in [human_description_en, sorted_by] if b]) async def extra_template(): nonlocal sort display_columns, display_rows = await self.display_columns_and_rows( database, table, results.description, rows, link_column=not is_view, truncate_cells=self.ds.config("truncate_cells_html"), ) metadata = ((self.ds.metadata("databases") or {}).get(database, {}).get("tables", {}).get(table, {})) self.ds.update_with_inherited_metadata(metadata) form_hidden_args = [] # Add currently selected facets for arg in special_args: if arg == "_facet" or arg.startswith("_facet_"): form_hidden_args.extend( (arg, item) for item in request.args.getlist(arg)) for arg in ("_fts_table", "_fts_pk"): if arg in special_args: form_hidden_args.append((arg, special_args[arg])) if request.args.get("_where"): for where_text in request.args.getlist("_where"): form_hidden_args.append(("_where", where_text)) # if no sort specified AND table has a single primary key, # set sort to that so arrow is displayed if not sort and not sort_desc: if 1 == len(pks): sort = pks[0] elif use_rowid: sort = "rowid" async def table_actions(): links = [] for hook in pm.hook.table_actions( datasette=self.ds, table=table, database=database, actor=request.actor, ): extra_links = await await_me_maybe(hook) if extra_links: links.extend(extra_links) return links return { "table_actions": table_actions, "supports_search": bool(fts_table), "search": search or "", "use_rowid": use_rowid, "filters": filters, "display_columns": display_columns, "filter_columns": columns, "display_rows": display_rows, "facets_timed_out": facets_timed_out, "sorted_facet_results": sorted( facet_results.values(), key=lambda f: (len(f["results"]), f["name"]), reverse=True, ), "extra_wheres_for_ui": extra_wheres_for_ui, "form_hidden_args": form_hidden_args, "is_sortable": any(c["sortable"] for c in display_columns), "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, "append_querystring": append_querystring, "request": request, "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, "custom_table_templates": [ f"_table-{to_css_class(database)}-{to_css_class(table)}.html", f"_table-table-{to_css_class(database)}-{to_css_class(table)}.html", "_table.html", ], "metadata": metadata, "view_definition": await db.get_view_definition(table), "table_definition": await db.get_table_definition(table), } return ( { "database": database, "table": table, "is_view": is_view, "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": results.truncated, "filtered_table_rows_count": filtered_table_rows_count, "expanded_columns": expanded_columns, "expandable_columns": expandable_columns, "columns": columns, "primary_keys": pks, "units": units, "query": { "sql": sql, "params": params }, "facet_results": facet_results, "suggested_facets": suggested_facets, "next": next_value and str(next_value) or None, "next_url": next_url, "private": private, "allow_execute_sql": await self.ds.permission_allowed(request.actor, "execute-sql", database, default=True), }, extra_template, ( f"table-{to_css_class(database)}-{to_css_class(table)}.html", "table.html", ), )
async def as_csv(self, request, database, hash, **kwargs): stream = request.args.get("_stream") if stream: # Some quick sanity checks if not self.ds.setting("allow_csv_stream"): raise BadRequest("CSV streaming is disabled") if request.args.get("_next"): raise BadRequest("_next not allowed for CSV streaming") kwargs["_size"] = "max" # Fetch the first page try: response_or_template_contexts = await self.data( request, database, hash, **kwargs) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts else: data, _, _ = response_or_template_contexts except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except (sqlite3.OperationalError) as e: raise DatasetteError(str(e)) except DatasetteError: raise # Convert rows and columns to CSV headings = data["columns"] # if there are expanded_columns we need to add additional headings expanded_columns = set(data.get("expanded_columns") or []) if expanded_columns: headings = [] for column in data["columns"]: headings.append(column) if column in expanded_columns: headings.append(f"{column}_label") async def stream_fn(r): nonlocal data writer = csv.writer(LimitedWriter(r, self.ds.setting("max_csv_mb"))) first = True next = None while first or (next and stream): try: if next: kwargs["_next"] = next if not first: data, _, _ = await self.data(request, database, hash, **kwargs) if first: await writer.writerow(headings) first = False next = data.get("next") for row in data["rows"]: if any(isinstance(r, bytes) for r in row): new_row = [] for column, cell in zip(headings, row): if isinstance(cell, bytes): # If this is a table page, use .urls.row_blob() if data.get("table"): pks = data.get("primary_keys") or [] cell = self.ds.absolute_url( request, self.ds.urls.row_blob( database, data["table"], path_from_row_pks( row, pks, not pks), column, ), ) else: # Otherwise generate URL for this query cell = self.ds.absolute_url( request, path_with_format( request=request, format="blob", extra_qs={ "_blob_column": column, "_blob_hash": hashlib.sha256( cell).hexdigest(), }, replace_format="csv", ), ) new_row.append(cell) row = new_row if not expanded_columns: # Simple path await writer.writerow(row) else: # Look for {"value": "label": } dicts and expand new_row = [] for heading, cell in zip(data["columns"], row): if heading in expanded_columns: if cell is None: new_row.extend(("", "")) else: assert isinstance(cell, dict) new_row.append(cell["value"]) new_row.append(cell["label"]) else: new_row.append(cell) await writer.writerow(new_row) except Exception as e: print("caught this", e) await r.write(str(e)) return content_type = "text/plain; charset=utf-8" headers = {} if self.ds.cors: headers["Access-Control-Allow-Origin"] = "*" if request.args.get("_dl", None): content_type = "text/csv; charset=utf-8" disposition = 'attachment; filename="{}.csv"'.format( kwargs.get("table", database)) headers["content-disposition"] = disposition return AsgiStream(stream_fn, headers=headers, content_type=content_type)
async def as_csv(self, request, database): kwargs = {} stream = request.args.get("_stream") # Do not calculate facets or counts: extra_parameters = [ "{}=1".format(key) for key in ("_nofacet", "_nocount") if not request.args.get(key) ] if extra_parameters: # Replace request object with a new one with modified scope if not request.query_string: new_query_string = "&".join(extra_parameters) else: new_query_string = (request.query_string + "&" + "&".join(extra_parameters)) new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1")) receive = request.receive request = Request(new_scope, receive) if stream: # Some quick soundness checks if not self.ds.setting("allow_csv_stream"): raise BadRequest("CSV streaming is disabled") if request.args.get("_next"): raise BadRequest("_next not allowed for CSV streaming") kwargs["_size"] = "max" # Fetch the first page try: response_or_template_contexts = await self.data(request) if isinstance(response_or_template_contexts, Response): return response_or_template_contexts elif len(response_or_template_contexts) == 4: data, _, _, _ = response_or_template_contexts else: data, _, _ = response_or_template_contexts except (sqlite3.OperationalError, InvalidSql) as e: raise DatasetteError(str(e), title="Invalid SQL", status=400) except sqlite3.OperationalError as e: raise DatasetteError(str(e)) except DatasetteError: raise # Convert rows and columns to CSV headings = data["columns"] # if there are expanded_columns we need to add additional headings expanded_columns = set(data.get("expanded_columns") or []) if expanded_columns: headings = [] for column in data["columns"]: headings.append(column) if column in expanded_columns: headings.append(f"{column}_label") content_type = "text/plain; charset=utf-8" preamble = "" postamble = "" trace = request.args.get("_trace") if trace: content_type = "text/html; charset=utf-8" preamble = ("<html><head><title>CSV debug</title></head>" '<body><textarea style="width: 90%; height: 70vh">') postamble = "</textarea></body></html>" async def stream_fn(r): nonlocal data, trace limited_writer = LimitedWriter(r, self.ds.setting("max_csv_mb")) if trace: await limited_writer.write(preamble) writer = csv.writer(EscapeHtmlWriter(limited_writer)) else: writer = csv.writer(limited_writer) first = True next = None while first or (next and stream): try: kwargs = {} if next: kwargs["_next"] = next if not first: data, _, _ = await self.data(request, **kwargs) if first: if request.args.get("_header") != "off": await writer.writerow(headings) first = False next = data.get("next") for row in data["rows"]: if any(isinstance(r, bytes) for r in row): new_row = [] for column, cell in zip(headings, row): if isinstance(cell, bytes): # If this is a table page, use .urls.row_blob() if data.get("table"): pks = data.get("primary_keys") or [] cell = self.ds.absolute_url( request, self.ds.urls.row_blob( database, data["table"], path_from_row_pks( row, pks, not pks), column, ), ) else: # Otherwise generate URL for this query url = self.ds.absolute_url( request, path_with_format( request=request, format="blob", extra_qs={ "_blob_column": column, "_blob_hash": hashlib.sha256( cell).hexdigest(), }, replace_format="csv", ), ) cell = url.replace("&_nocount=1", "").replace( "&_nofacet=1", "") new_row.append(cell) row = new_row if not expanded_columns: # Simple path await writer.writerow(row) else: # Look for {"value": "label": } dicts and expand new_row = [] for heading, cell in zip(data["columns"], row): if heading in expanded_columns: if cell is None: new_row.extend(("", "")) else: assert isinstance(cell, dict) new_row.append(cell["value"]) new_row.append(cell["label"]) else: new_row.append(cell) await writer.writerow(new_row) except Exception as e: sys.stderr.write("Caught this error: {}\n".format(e)) sys.stderr.flush() await r.write(str(e)) return await limited_writer.write(postamble) headers = {} if self.ds.cors: add_cors_headers(headers) if request.args.get("_dl", None): if not trace: content_type = "text/csv; charset=utf-8" disposition = 'attachment; filename="{}.csv"'.format( request.url_vars.get("table", database)) headers["content-disposition"] = disposition return AsgiStream(stream_fn, headers=headers, content_type=content_type)
async def _data_traced( self, request, default_labels=False, _next=None, _size=None, ): database_route = tilde_decode(request.url_vars["database"]) table_name = tilde_decode(request.url_vars["table"]) try: db = self.ds.get_database(route=database_route) except KeyError: raise NotFound("Database not found: {}".format(database_route)) database_name = db.name # For performance profiling purposes, ?_noparallel=1 turns off asyncio.gather async def _gather_parallel(*args): return await asyncio.gather(*args) async def _gather_sequential(*args): results = [] for fn in args: results.append(await fn) return results gather = (_gather_sequential if request.args.get("_noparallel") else _gather_parallel) # If this is a canned query, not a table, then dispatch to QueryView instead canned_query = await self.ds.get_canned_query(database_name, table_name, request.actor) if canned_query: return await QueryView(self.ds).data( request, canned_query["sql"], metadata=canned_query, editable=False, canned_query=table_name, named_parameters=canned_query.get("params"), write=bool(canned_query.get("write")), ) is_view, table_exists = map( bool, await gather(db.get_view_definition(table_name), db.table_exists(table_name)), ) # If table or view not found, return 404 if not is_view and not table_exists: raise NotFound(f"Table not found: {table_name}") # Ensure user has permission to view this table await self.ds.ensure_permissions( request.actor, [ ("view-table", (database_name, table_name)), ("view-database", database_name), "view-instance", ], ) private = not await self.ds.permission_allowed( None, "view-table", (database_name, table_name), default=True) # Handle ?_filter_column and redirect, if present redirect_params = filters_should_redirect(request.args) if redirect_params: return self.redirect( request, path_with_added_args(request, redirect_params), forward_querystring=False, ) # If ?_sort_by_desc=on (from checkbox) redirect to _sort_desc=(_sort) if "_sort_by_desc" in request.args: return self.redirect( request, path_with_added_args( request, { "_sort_desc": request.args.get("_sort"), "_sort_by_desc": None, "_sort": None, }, ), forward_querystring=False, ) # Introspect columns and primary keys for table pks = await db.primary_keys(table_name) table_columns = await db.table_columns(table_name) # Take ?_col= and ?_nocol= into account specified_columns = await self.columns_to_select( table_columns, pks, request) select_specified_columns = ", ".join( escape_sqlite(t) for t in specified_columns) select_all_columns = ", ".join(escape_sqlite(t) for t in table_columns) # rowid tables (no specified primary key) need a different SELECT use_rowid = not pks and not is_view if use_rowid: select_specified_columns = f"rowid, {select_specified_columns}" select_all_columns = f"rowid, {select_all_columns}" order_by = "rowid" order_by_pks = "rowid" else: order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks]) order_by = order_by_pks if is_view: order_by = "" nocount = request.args.get("_nocount") nofacet = request.args.get("_nofacet") nosuggest = request.args.get("_nosuggest") if request.args.get("_shape") in ("array", "object"): nocount = True nofacet = True table_metadata = self.ds.table_metadata(database_name, table_name) units = table_metadata.get("units", {}) # Arguments that start with _ and don't contain a __ are # special - things like ?_search= - and should not be # treated as filters. filter_args = [] for key in request.args: if not (key.startswith("_") and "__" not in key): for v in request.args.getlist(key): filter_args.append((key, v)) # Build where clauses from query string arguments filters = Filters(sorted(filter_args), units, ureg) where_clauses, params = filters.build_where_clauses(table_name) # Execute filters_from_request plugin hooks - including the default # ones that live in datasette/filters.py extra_context_from_filters = {} extra_human_descriptions = [] for hook in pm.hook.filters_from_request( request=request, table=table_name, database=database_name, datasette=self.ds, ): filter_arguments = await await_me_maybe(hook) if filter_arguments: where_clauses.extend(filter_arguments.where_clauses) params.update(filter_arguments.params) extra_human_descriptions.extend( filter_arguments.human_descriptions) extra_context_from_filters.update( filter_arguments.extra_context) # Deal with custom sort orders sortable_columns = await self.sortable_columns_for_table( database_name, table_name, use_rowid) sort = request.args.get("_sort") sort_desc = request.args.get("_sort_desc") if not sort and not sort_desc: sort = table_metadata.get("sort") sort_desc = table_metadata.get("sort_desc") if sort and sort_desc: raise DatasetteError( "Cannot use _sort and _sort_desc at the same time") if sort: if sort not in sortable_columns: raise DatasetteError(f"Cannot sort table by {sort}") order_by = escape_sqlite(sort) if sort_desc: if sort_desc not in sortable_columns: raise DatasetteError(f"Cannot sort table by {sort_desc}") order_by = f"{escape_sqlite(sort_desc)} desc" from_sql = "from {table_name} {where}".format( table_name=escape_sqlite(table_name), where=("where {} ".format(" and ".join(where_clauses))) if where_clauses else "", ) # Copy of params so we can mutate them later: from_sql_params = dict(**params) count_sql = f"select count(*) {from_sql}" # Handle pagination driven by ?_next= _next = _next or request.args.get("_next") offset = "" if _next: sort_value = None if is_view: # _next is an offset offset = f" offset {int(_next)}" else: components = urlsafe_components(_next) # If a sort order is applied and there are multiple components, # the first of these is the sort value if (sort or sort_desc) and (len(components) > 1): sort_value = components[0] # Special case for if non-urlencoded first token was $null if _next.split(",")[0] == "$null": sort_value = None components = components[1:] # Figure out the SQL for next-based-on-primary-key first next_by_pk_clauses = [] if use_rowid: next_by_pk_clauses.append(f"rowid > :p{len(params)}") params[f"p{len(params)}"] = components[0] else: # Apply the tie-breaker based on primary keys if len(components) == len(pks): param_len = len(params) next_by_pk_clauses.append( compound_keys_after_sql(pks, param_len)) for i, pk_value in enumerate(components): params[f"p{param_len + i}"] = pk_value # Now add the sort SQL, which may incorporate next_by_pk_clauses if sort or sort_desc: if sort_value is None: if sort_desc: # Just items where column is null ordered by pk where_clauses.append( "({column} is null and {next_clauses})".format( column=escape_sqlite(sort_desc), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} is not null or ({column} is null and {next_clauses}))" .format( column=escape_sqlite(sort), next_clauses=" and ".join( next_by_pk_clauses), )) else: where_clauses.append( "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))" .format( column=escape_sqlite(sort or sort_desc), op=">" if sort else "<", p=len(params), extra_desc_only="" if sort else " or {column2} is null".format( column2=escape_sqlite(sort or sort_desc)), next_clauses=" and ".join(next_by_pk_clauses), )) params[f"p{len(params)}"] = sort_value order_by = f"{order_by}, {order_by_pks}" else: where_clauses.extend(next_by_pk_clauses) where_clause = "" if where_clauses: where_clause = f"where {' and '.join(where_clauses)} " if order_by: order_by = f"order by {order_by}" extra_args = {} # Handle ?_size=500 page_size = _size or request.args.get("_size") or table_metadata.get( "size") if page_size: if page_size == "max": page_size = self.ds.max_returned_rows try: page_size = int(page_size) if page_size < 0: raise ValueError except ValueError: raise BadRequest("_size must be a positive integer") if page_size > self.ds.max_returned_rows: raise BadRequest( f"_size must be <= {self.ds.max_returned_rows}") extra_args["page_size"] = page_size else: page_size = self.ds.page_size # Facets are calculated against SQL without order by or limit sql_no_order_no_limit = ( "select {select_all_columns} from {table_name} {where}".format( select_all_columns=select_all_columns, table_name=escape_sqlite(table_name), where=where_clause, )) # This is the SQL that populates the main table on the page sql = "select {select_specified_columns} from {table_name} {where}{order_by} limit {page_size}{offset}".format( select_specified_columns=select_specified_columns, table_name=escape_sqlite(table_name), where=where_clause, order_by=order_by, page_size=page_size + 1, offset=offset, ) if request.args.get("_timelimit"): extra_args["custom_time_limit"] = int( request.args.get("_timelimit")) # Execute the main query! results = await db.execute(sql, params, truncate=True, **extra_args) # Calculate the total count for this query filtered_table_rows_count = None if (not db.is_mutable and self.ds.inspect_data and count_sql == f"select count(*) from {table_name} "): # We can use a previously cached table row count try: filtered_table_rows_count = self.ds.inspect_data[ database_name]["tables"][table_name]["count"] except KeyError: pass # Otherwise run a select count(*) ... if count_sql and filtered_table_rows_count is None and not nocount: try: count_rows = list(await db.execute(count_sql, from_sql_params)) filtered_table_rows_count = count_rows[0][0] except QueryInterrupted: pass # Faceting if not self.ds.setting("allow_facet") and any( arg.startswith("_facet") for arg in request.args): raise BadRequest("_facet= is not allowed") # pylint: disable=no-member facet_classes = list( itertools.chain.from_iterable(pm.hook.register_facet_classes())) facet_results = {} facets_timed_out = [] facet_instances = [] for klass in facet_classes: facet_instances.append( klass( self.ds, request, database_name, sql=sql_no_order_no_limit, params=params, table=table_name, metadata=table_metadata, row_count=filtered_table_rows_count, )) async def execute_facets(): if not nofacet: # Run them in parallel facet_awaitables = [ facet.facet_results() for facet in facet_instances ] facet_awaitable_results = await gather(*facet_awaitables) for ( instance_facet_results, instance_facets_timed_out, ) in facet_awaitable_results: for facet_info in instance_facet_results: base_key = facet_info["name"] key = base_key i = 1 while key in facet_results: i += 1 key = f"{base_key}_{i}" facet_results[key] = facet_info facets_timed_out.extend(instance_facets_timed_out) suggested_facets = [] async def execute_suggested_facets(): # Calculate suggested facets if (self.ds.setting("suggest_facets") and self.ds.setting("allow_facet") and not _next and not nofacet and not nosuggest): # Run them in parallel facet_suggest_awaitables = [ facet.suggest() for facet in facet_instances ] for suggest_result in await gather(*facet_suggest_awaitables): suggested_facets.extend(suggest_result) await gather(execute_facets(), execute_suggested_facets()) # Figure out columns and rows for the query columns = [r[0] for r in results.description] rows = list(results.rows) # Expand labeled columns if requested expanded_columns = [] expandable_columns = await self.expandable_columns( database_name, table_name) columns_to_expand = None try: all_labels = value_as_boolean(request.args.get("_labels", "")) except ValueError: all_labels = default_labels # Check for explicit _label= if "_label" in request.args: columns_to_expand = request.args.getlist("_label") if columns_to_expand is None and all_labels: # expand all columns with foreign keys columns_to_expand = [fk["column"] for fk, _ in expandable_columns] if columns_to_expand: expanded_labels = {} for fk, _ in expandable_columns: column = fk["column"] if column not in columns_to_expand: continue if column not in columns: continue expanded_columns.append(column) # Gather the values column_index = columns.index(column) values = [row[column_index] for row in rows] # Expand them expanded_labels.update(await self.ds.expand_foreign_keys( database_name, table_name, column, values)) if expanded_labels: # Rewrite the rows new_rows = [] for row in rows: new_row = CustomRow(columns) for column in row.keys(): value = row[column] if (column, value ) in expanded_labels and value is not None: new_row[column] = { "value": value, "label": expanded_labels[(column, value)], } else: new_row[column] = value new_rows.append(new_row) rows = new_rows # Pagination next link next_value = None next_url = None if 0 < page_size < len(rows): if is_view: next_value = int(_next or 0) + page_size else: next_value = path_from_row_pks(rows[-2], pks, use_rowid) # If there's a sort or sort_desc, add that value as a prefix if (sort or sort_desc) and not is_view: prefix = rows[-2][sort or sort_desc] if isinstance(prefix, dict) and "value" in prefix: prefix = prefix["value"] if prefix is None: prefix = "$null" else: prefix = tilde_encode(str(prefix)) next_value = f"{prefix},{next_value}" added_args = {"_next": next_value} if sort: added_args["_sort"] = sort else: added_args["_sort_desc"] = sort_desc else: added_args = {"_next": next_value} next_url = self.ds.absolute_url( request, self.ds.urls.path(path_with_replaced_args(request, added_args))) rows = rows[:page_size] # human_description_en combines filters AND search, if provided human_description_en = filters.human_description_en( extra=extra_human_descriptions) if sort or sort_desc: sorted_by = "sorted by {}{}".format( (sort or sort_desc), " descending" if sort_desc else "") human_description_en = " ".join( [b for b in [human_description_en, sorted_by] if b]) async def extra_template(): nonlocal sort display_columns, display_rows = await display_columns_and_rows( self.ds, database_name, table_name, results.description, rows, link_column=not is_view, truncate_cells=self.ds.setting("truncate_cells_html"), sortable_columns=await self.sortable_columns_for_table(database_name, table_name, use_rowid=True), ) metadata = ((self.ds.metadata("databases") or {}).get(database_name, {}).get("tables", {}).get(table_name, {})) self.ds.update_with_inherited_metadata(metadata) form_hidden_args = [] for key in request.args: if (key.startswith("_") and key not in ("_sort", "_search", "_next") and "__" not in key): for value in request.args.getlist(key): form_hidden_args.append((key, value)) # if no sort specified AND table has a single primary key, # set sort to that so arrow is displayed if not sort and not sort_desc: if 1 == len(pks): sort = pks[0] elif use_rowid: sort = "rowid" async def table_actions(): links = [] for hook in pm.hook.table_actions( datasette=self.ds, table=table_name, database=database_name, actor=request.actor, request=request, ): extra_links = await await_me_maybe(hook) if extra_links: links.extend(extra_links) return links # filter_columns combine the columns we know are available # in the table with any additional columns (such as rowid) # which are available in the query filter_columns = list(columns) + [ table_column for table_column in table_columns if table_column not in columns ] d = { "table_actions": table_actions, "use_rowid": use_rowid, "filters": filters, "display_columns": display_columns, "filter_columns": filter_columns, "display_rows": display_rows, "facets_timed_out": facets_timed_out, "sorted_facet_results": sorted( facet_results.values(), key=lambda f: (len(f["results"]), f["name"]), reverse=True, ), "form_hidden_args": form_hidden_args, "is_sortable": any(c["sortable"] for c in display_columns), "fix_path": self.ds.urls.path, "path_with_replaced_args": path_with_replaced_args, "path_with_removed_args": path_with_removed_args, "append_querystring": append_querystring, "request": request, "sort": sort, "sort_desc": sort_desc, "disable_sort": is_view, "custom_table_templates": [ f"_table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", f"_table-table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", "_table.html", ], "metadata": metadata, "view_definition": await db.get_view_definition(table_name), "table_definition": await db.get_table_definition(table_name), "datasette_allow_facet": "true" if self.ds.setting("allow_facet") else "false", } d.update(extra_context_from_filters) return d return ( { "database": database_name, "table": table_name, "is_view": is_view, "human_description_en": human_description_en, "rows": rows[:page_size], "truncated": results.truncated, "filtered_table_rows_count": filtered_table_rows_count, "expanded_columns": expanded_columns, "expandable_columns": expandable_columns, "columns": columns, "primary_keys": pks, "units": units, "query": { "sql": sql, "params": params }, "facet_results": facet_results, "suggested_facets": suggested_facets, "next": next_value and str(next_value) or None, "next_url": next_url, "private": private, "allow_execute_sql": await self.ds.permission_allowed(request.actor, "execute-sql", database_name, default=True), }, extra_template, ( f"table-{to_css_class(database_name)}-{to_css_class(table_name)}.html", "table.html", ), )
async def inner(): where_clauses = [] params = {} human_descriptions = [] extra_context = {} # Figure out which fts_table to use table_metadata = datasette.table_metadata(database, table) db = datasette.get_database(database) fts_table = request.args.get("_fts_table") fts_table = fts_table or table_metadata.get("fts_table") fts_table = fts_table or await db.fts_table(table) fts_pk = request.args.get("_fts_pk", table_metadata.get("fts_pk", "rowid")) search_args = { key: request.args[key] for key in request.args if key.startswith("_search") and key != "_searchmode" } search = "" search_mode_raw = table_metadata.get("searchmode") == "raw" # Or set search mode from the querystring qs_searchmode = request.args.get("_searchmode") if qs_searchmode == "escaped": search_mode_raw = False if qs_searchmode == "raw": search_mode_raw = True extra_context["supports_search"] = bool(fts_table) if fts_table and search_args: if "_search" in search_args: # Simple ?_search=xxx search = search_args["_search"] where_clauses.append( "{fts_pk} in (select rowid from {fts_table} where {fts_table} match {match_clause})" .format( fts_table=escape_sqlite(fts_table), fts_pk=escape_sqlite(fts_pk), match_clause=":search" if search_mode_raw else "escape_fts(:search)", )) human_descriptions.append(f'search matches "{search}"') params["search"] = search extra_context["search"] = search else: # More complex: search against specific columns for i, (key, search_text) in enumerate(search_args.items()): search_col = key.split("_search_", 1)[1] if search_col not in await db.table_columns(fts_table): raise BadRequest("Cannot search by that column") where_clauses.append( "rowid in (select rowid from {fts_table} where {search_col} match {match_clause})" .format( fts_table=escape_sqlite(fts_table), search_col=escape_sqlite(search_col), match_clause=":search_{}".format(i) if search_mode_raw else "escape_fts(:search_{})".format(i), )) human_descriptions.append( f'search column "{search_col}" matches "{search_text}"' ) params[f"search_{i}"] = search_text extra_context["search"] = search_text return FilterArguments(where_clauses, params, human_descriptions, extra_context)