def __string_col(self, col: str) -> str: col_type = self.__columns.get(col, None) col_type = str(col_type) if col_type else None if col_type and 'String' in col_type and 'FixedString' not in col_type: return escape_col(col) else: return 'toString({})'.format(escape_col(col))
def __init__(self, base_name, name, type): self.base_name = base_name self.name = name self.type = type self.flattened = '{}.{}'.format( self.base_name, self.name) if self.base_name else self.name self.escaped = escape_col(self.flattened)
def column_expr(self, column_name, query: Query, parsing_context: ParsingContext, table_alias: str = ""): """ Return an expression for the column name. Handle special column aliases that evaluate to something else. """ return escape_col(qualified_column(column_name, table_alias))
def test_escape_col(self): assert escape_col(None) is None assert escape_col('') == '' assert escape_col('foo') == 'foo' assert escape_col('foo.bar') == 'foo.bar' assert escape_col('foo:bar') == '`foo:bar`' # Even though backtick characters in columns should be # disallowed by the query schema, make sure we dont allow # injection anyway. assert escape_col("`") == r"`\``" assert escape_col("production`; --") == r"`production\`; --`"
def column_expr(self, column_name, body): """ Return an expression for the column name. Handle special column aliases that evaluate to something else. """ return escape_col(column_name)
def for_schema(self): return '{} {}'.format(escape_col(self.name), self.type.for_schema())
def process_delete_tag(message, dataset) -> Optional[Replacement]: tag = message['tag'] if not tag: return None assert isinstance(tag, str) timestamp = datetime.strptime(message['datetime'], settings.PAYLOAD_DATETIME_FORMAT) tag_column_name = dataset.get_tag_column_map()['tags'].get(tag, tag) is_promoted = tag in dataset.get_promoted_tags()['tags'] where = """\ WHERE project_id = %(project_id)s AND received <= CAST('%(timestamp)s' AS DateTime) AND NOT deleted """ if is_promoted: where += "AND %(tag_column)s IS NOT NULL" else: where += "AND has(`tags.key`, %(tag_str)s)" insert_query_template = """\ INSERT INTO %(dist_write_table_name)s (%(all_columns)s) SELECT %(select_columns)s FROM %(dist_read_table_name)s FINAL """ + where select_columns = [] all_columns = dataset.get_dataset_schemas().get_read_schema().get_columns() for col in all_columns: if is_promoted and col.flattened == tag_column_name: select_columns.append('NULL') elif col.flattened == 'tags.key': select_columns.append( "arrayFilter(x -> (indexOf(`tags.key`, x) != indexOf(`tags.key`, %s)), `tags.key`)" % escape_string(tag)) elif col.flattened == 'tags.value': select_columns.append( "arrayMap(x -> arrayElement(`tags.value`, x), arrayFilter(x -> x != indexOf(`tags.key`, %s), arrayEnumerate(`tags.value`)))" % escape_string(tag)) else: select_columns.append(col.escaped) all_column_names = [col.escaped for col in all_columns] query_args = { 'all_columns': ', '.join(all_column_names), 'select_columns': ', '.join(select_columns), 'project_id': message['project_id'], 'tag_str': escape_string(tag), 'tag_column': escape_col(tag_column_name), 'timestamp': timestamp.strftime(DATETIME_FORMAT), } count_query_template = """\ SELECT count() FROM %(dist_read_table_name)s FINAL """ + where query_time_flags = (NEEDS_FINAL, message['project_id']) return Replacement(count_query_template, insert_query_template, query_args, query_time_flags)