def compile_when_clause(element: _WhenClause[_Ops], compiler: SQLCompiler, **kwargs): text = element.when_type() if element.condition is not None: text += " AND {}".format(compiler.process(element.condition, **kwargs)) # The when_clause specs are ever so slightly different from the classic UPDATE/INSERT/DELETE clauses, so this sucks if isinstance(element.action, Delete): action_text = "DELETE" # this one's alright though elif isinstance(element.action, Update): action_text = compiler.process(element.action, **kwargs) # remove the `<table>` from `UPDATE <table> SET` action_text = action_text.replace( compiler.process(element.action.table, asfrom=True), "", 1) elif isinstance(element.action, Insert): if not (element.action._values or element.action._ordered_values): # type: ignore action_text = "INSERT ROW" else: action_text = compiler.process(element.action, **kwargs) # remove the `INTO <table>` from `INSERT INTO <table> (...) VALUES`, handling the potential aliasing action_text = action_text.replace( f"INTO `{element.action.table.name}` ", "", 1) else: _: NoReturn = element.action raise AssertionError(f"Invalid value: {element.action !r}") text += " THEN \n\t{}\n".format(action_text) return text
def compile_extract_bigquery(element: Extract, compiler: SQLCompiler, **kw): return "extract(%s FROM %s)" % ( compiler.process( literal_column('DAYOFWEEK' if element.part == 'DOW' else element.part), **kw), compiler.process(element.expr, **kw), )
def compile_timestamp_diff_bigquery(element: TimestampDiff, compiler: SQLCompiler, **kw): return "timestampdiff(%s, %s, %s)" % ( compiler.process(element.start, **kw), compiler.process(element.end, **kw), compiler.process(literal(element.part, String()), **kw), )
def compile_parse_date_bigquery(element: ParseDate, compiler: SQLCompiler, **kw): if element.format is not None: return "parse_date(%s, %s)" % (compiler.process( element.format, **kw), compiler.process(element.expr, **kw)) else: return compiler.process(cast(element.expr, Date()), **kw)
def compile_timestamp_diff_snowflake(element: TimestampDiff, compiler: SQLCompiler, **kw): return "timestampdiff(%s, %s, %s)" % ( compiler.process(literal_column(element.part, String()), **kw), compiler.process(element.start, **kw), compiler.process(element.end, **kw), )
def _create_sqlite_query(self, values: list, if_row_exists: str) -> str: def escape_col(col): # unbound column from its table # otherwise the column would compile as "table.col_name" # which we could not use in e.g. SQlite unbound_col = deepcopy(col) unbound_col.table = None return str(unbound_col.compile(dialect=self.connection.dialect)) # prepare start of upsert (INSERT VALUES (...) ON CONFLICT) upsert = SQLCompiler(dialect=self.connection.dialect, statement=self.table.insert().values(values)) # append on conflict clause pk = [escape_col(c) for c in self.table.primary_key] non_pks = [ escape_col(c) for c in self.table.columns if c not in list(self.table.primary_key) ] ondup = f'ON CONFLICT ({",".join(pk)})' # always use "DO NOTHING" if there are no primary keys if (not non_pks) or (if_row_exists == 'ignore'): ondup_action = 'DO NOTHING' upsert.string = ' '.join((upsert.string, ondup, ondup_action)) elif if_row_exists == 'update': ondup_action = 'DO UPDATE SET' updates = ', '.join(f'{c}=EXCLUDED.{c}' for c in non_pks) upsert.string = ' '.join( (upsert.string, ondup, ondup_action, updates)) return upsert
def compile_parse_date_snowflake(element: ParseDate, compiler: SQLCompiler, **kw): if element.format is not None: return "to_date(%s, %s)" % (compiler.process( element.expr, **kw), compiler.process(element.format, **kw)) else: return "to_date(%s)" % compiler.process(element.expr, **kw)
def compile_split_part_bigquery(element: SplitPart, compiler: SQLCompiler, **kw): return "split(%s, %s)[safe_ordinal(%s)]" % ( compiler.process(element.expr, **kw), compiler.process(element.delimiter, **kw), compiler.process(element.position, **kw), )
def compile_split_part_snowflake(element: SplitPart, compiler: SQLCompiler, **kw): return "split_part(%s, %s, %s)" % ( compiler.process(element.expr, **kw), compiler.process(element.delimiter, **kw), compiler.process(element.position, **kw), )
def compile_merge_into(element: MergeInto, compiler: SQLCompiler, **kwargs): base_template = dedent("""\ MERGE INTO {target} USING {source} ON {cond} """) # Compile the INSERT/UPDATE/DELETE parts first. # This is because CTEs in the `source` value (and elsewhere, but there really shouldn't # ever be a CTE in `target` and/or `cond`) would appear in the INSERT/UPDATE (SQLAlchemy # sees a CTE in the context, so it pushes it to the top of the INSERT/UPDATE part). # This feels pretty hackish, but it works well for now. # Another solution might be to process the `when_clauses` in a copy() of the compiler # with the .ctes emptied, ie in a "blank" state. actions = [] for when_clause in element.when_clauses: actions.append(compiler.process(when_clause, **kwargs)) query = base_template.format( target=compiler.process(element.target, asfrom=True, **kwargs), source=compiler.process(element.source, asfrom=True, **kwargs), cond=compiler.process(element.onclause, **kwargs), ) query += "".join(actions) # deactivate all "fetch PK" or "implicit-returning" features # XXX should we set isdelete = False too? compiler.isinsert = compiler.isupdate = False return dedent(query)
def compile_convert_timezone_bigquery(element: ConvertTimezone, compiler: SQLCompiler, **kw): if element.tz_to is not None: return "datetime(datetime(%s, %s), %s)" % ( compiler.process(element.expr, **kw), compiler.process(element.tz_from, **kw), compiler.process(element.tz_to, **kw), ) else: return "datetime(%s, %s)" % (compiler.process( element.expr, **kw), compiler.process(element.tz_from, **kw))
def on_message(self, message): command = message.split('|')[0] query = '|'.join(message.split('|')[1:]) if command == 'criterion': criterion = query.split('|')[0] value = '|'.join(query.split('|')[1:]) if criterion == 'date': try: value = datetime.strptime( value.replace('+', ' '), '%Y-%m-%d %H:%M:%S') except ValueError: try: value = datetime.strptime('%Y-%m-%d') except ValueError: value = datetime.now() filter_ = between(Visit.date, value.date(), value.date() + timedelta(days=1)) elif criterion in ( 'referrer', 'asn', 'browser_name', 'site', 'browser_version', 'browser_name_version', 'query'): filter_ = getattr(Visit, criterion).ilike('%%%s%%' % value) else: filter_ = func.lower( getattr(Visit, criterion)) == value.lower() query = (self.db .query(Visit) .filter(filter_)) dialect = query.session.bind.dialect compiler = SQLCompiler(dialect, query.statement) compiler.compile() self.count = 0 self.stop = 20 self.state = 'start' self.execute(compiler.string, compiler.params) elif command == 'more': if self.state == 'paused': self.stop += 20 self.state = 'executing' self.cursor.execute( 'FETCH FORWARD 1 FROM visit_cur;') elif command == '/status': for i, conn in enumerate(adb._pool): if conn.busy(): self.write_message( 'INFO|Connection %d is busy: ' 'Executing? %s Closed? %d Status? %s (%d)' % ( i, conn.connection.isexecuting(), conn.connection.closed, conn.connection.get_transaction_status(), conn.connection.get_backend_pid())) else: self.write_message('INFO|Connection %d is free' % i)
def compile_element(element, dialect): #statement = query.statement comp = SQLCompiler(dialect, element) comp.compile() enc = dialect.encoding params = {} for k,v in comp.params.iteritems(): if isinstance(v, unicode): v = v.encode(enc) params[k] = sqlescape(v) return (comp.string.encode(enc) % params).decode(enc)
def compile_convert_timezone_snowflake(element: ConvertTimezone, compiler: SQLCompiler, **kw): if element.tz_to is not None: return "convert_timezone(%s, %s, %s)" % ( compiler.process(element.tz_from, **kw), compiler.process(element.tz_to, **kw), compiler.process(element.expr, **kw), ) else: return "convert_timezone(%s, %s)" % ( compiler.process(element.tz_from, **kw), compiler.process(element.expr, **kw), )
def compile_insert_on_duplicate_key_update(insert: Insert, compiler: SQLCompiler, **kw) -> str: """ We can't get the fieldnames directly from 'insert' or 'compiler'. We could rewrite the innards of the visit_insert statement, like https://github.com/bedwards/sqlalchemy_mysql_ext/blob/master/duplicate.py # noqa ... but, like that, it will get outdated. We could use a hack-in-by-hand method, like http://stackoverflow.com/questions/6611563/sqlalchemy-on-duplicate-key-update ... but a little automation would be nice. So, regex to the rescue. NOTE THAT COLUMNS ARE ALREADY QUOTED by this stage; no need to repeat. """ # log.critical(compiler.__dict__) # log.critical(compiler.dialect.__dict__) # log.critical(insert.__dict__) s = compiler.visit_insert(insert, **kw) # log.critical(s) m = RE_INSERT_FIELDNAMES.match(s) if m is None: raise ValueError("compile_insert_on_duplicate_key_update: no match") columns = [c.strip() for c in m.group('columns').split(",")] # log.critical(columns) updates = ", ".join( ["{c} = VALUES({c})".format(c=c) for c in columns]) s += ' ON DUPLICATE KEY UPDATE {}'.format(updates) log.critical(s) return s
def compile_insert_on_duplicate_key_update(insert: Insert, compiler: SQLCompiler, **kw) -> str: """ Hooks into the use of the :class:`InsertOnDuplicate` class for the MySQL dialect. Compiles the relevant SQL for an ``INSERT... ON DUPLICATE KEY UPDATE`` statement. Notes: - We can't get the fieldnames directly from ``insert`` or ``compiler``. - We could rewrite the innards of the visit_insert statement (https://github.com/bedwards/sqlalchemy_mysql_ext/blob/master/duplicate.py)... but, like that, it will get outdated. - We could use a hack-in-by-hand method (http://stackoverflow.com/questions/6611563/sqlalchemy-on-duplicate-key-update) ... but a little automation would be nice. - So, regex to the rescue. - NOTE THAT COLUMNS ARE ALREADY QUOTED by this stage; no need to repeat. """ # noqa # log.critical(compiler.__dict__) # log.critical(compiler.dialect.__dict__) # log.critical(insert.__dict__) s = compiler.visit_insert(insert, **kw) # log.critical(s) m = RE_INSERT_FIELDNAMES.match(s) if m is None: raise ValueError("compile_insert_on_duplicate_key_update: no match") columns = [c.strip() for c in m.group('columns').split(",")] # log.critical(columns) updates = ", ".join([f"{c} = VALUES({c})" for c in columns]) s += f' ON DUPLICATE KEY UPDATE {updates}' # log.critical(s) return s
def _append_string(insert: Insert, compiler: SQLCompiler, **kwargs: Any) -> str: """append a string to insert""" append_string: str = compiler.visit_insert(insert, **kwargs) if insert.kwargs['postgresql_append_string']: if append_string.rfind("RETURNING") == -1: return "%s %s" % (append_string, insert.kwargs['postgresql_append_string']) return append_string.replace( "RETURNING", " " + insert.kwargs['postgresql_append_string'] + " RETURNING ") return append_string
def pg(element: function, compiler: SQLCompiler, **kw: Dict[str, Any]) -> str: args = iter(element.clauses) # type: ignore json_field = next(args) # type: ignore type_bind_param = next(args) # type: ignore type_: TypeEngine = type_bind_param.value # type: ignore assert isinstance(type_, TypeEngine) or issubclass(type_, TypeEngine) select_from = sqla_func.jsonb_array_elements(cast( json_field, JSONB)).table_valued("value") statement = select([sqla_func.array_agg(cast(select_from.column, type_))]) return compiler.process(statement, **kw)
def compile_nullif(element: NullIf, compiler: SQLCompiler, **kw): return "nullif(%s)" % compiler.process(element.clauses, **kw)
def sqlite_upsert(engine, connection, table, values, if_row_exists): """ Compiles and executes a SQlite ON CONFLICT...DO NOTHING or DO UPDATE statement. Parameters ---------- engine : sqlalchemy.engine.base.Engine connection : sqlalchemy.engine.base.Connection table : sqlalchemy.sql.schema.Table values : list of dict if_row_exists : {'update', 'ignore'} * If 'update' issues a ON CONFLICT...DO UPDATE statement * If 'ignore' issues a ON CONFLICT...DO NOTHING statement Examples -------- >>> import datetime >>> from sqlalchemy import create_engine >>> from pangres.examples import _TestsExampleTable >>> from pangres.helpers import PandasSpecialEngine >>> >>> engine = create_engine('sqlite:///:memory:') >>> df = _TestsExampleTable.create_example_df(nb_rows=5) >>> df # doctest: +SKIP | profileid | email | timestamp | size_in_meters | likes_pizza | favorite_colors | |:------------|:------------------|:--------------------------|-----------------:|:--------------|:-----------------------------| | abc0 | [email protected] | 2007-10-11 23:15:06+00:00 | 1.93994 | False | ['yellow', 'blue'] | | abc1 | [email protected] | 2007-11-21 07:18:20+00:00 | 1.98637 | True | ['blue', 'pink'] | | abc2 | [email protected] | 2002-09-30 17:55:09+00:00 | 1.55945 | True | ['blue'] | | abc3 | [email protected] | 2007-06-13 22:08:36+00:00 | 2.2495 | True | ['orange', 'blue'] | | abc4 | [email protected] | 2004-11-22 04:54:09+00:00 | 2.2019 | False | ['orange', 'yellow', 'blue'] | >>> pse = PandasSpecialEngine(engine=engine, df=df, table_name='test_upsert_sqlite') >>> >>> insert_values = {'profileid':'abc5', 'email': '*****@*****.**', ... 'timestamp': datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), ... 'size_in_meters':1.9, ... 'likes_pizza':True, ... 'favorite_colors':['red', 'pink']} >>> >>> sqlite_upsert(engine=engine, connection=engine.connect(), table=pse.table, ... values=list(insert_values.values()), if_row_exists='update') # doctest: +SKIP """ def escape_col(col): # unbound column from its table # otherwise the column would compile as "table.col_name" # which we could not use in e.g. SQlite unbound_col = deepcopy(col) unbound_col.table = None return str(unbound_col.compile(dialect=engine.dialect)) # prepare start of insert (INSERT VALUES (...) ON CONFLICT) pk = [escape_col(c) for c in table.primary_key] insert = SQLCompiler(dialect=engine.dialect, statement=table.insert().values(values)) # append on conflict clause pk = [escape_col(c) for c in table.primary_key] ondup = f'ON CONFLICT ({",".join(pk)})' if if_row_exists == 'ignore': ondup_action = 'DO NOTHING' insert.string = ' '.join((insert.string, ondup, ondup_action)) elif if_row_exists == 'update': ondup_action = 'DO UPDATE SET' non_pks = [ escape_col(c) for c in table.columns if c not in list(table.primary_key) ] updates = ', '.join(f'{c}=EXCLUDED.{c}' for c in non_pks) insert.string = ' '.join((insert.string, ondup, ondup_action, updates)) connection.execute(insert)
def compile_date_trunk_snowflake(element: DateTrunc, compiler: SQLCompiler, **kw): return "date_trunc(%s, %s)" % (compiler.process(literal( element.part), **kw), compiler.process(element.expr, **kw))
def compile_date_trunk_bigquery(element: DateTrunc, compiler: SQLCompiler, **kw): return "datetime_trunc(%s, %s)" % ( compiler.process(element.expr, **kw), compiler.process(literal_column(element.bigquery_part), **kw), )
def assert_ignore_clause(self, scheme, expected): dialect = SA.create_engine(scheme + ':///').dialect compiler = SQLCompiler(dialect=dialect, statement=self.statement) T.assert_equal(str(compiler), expected)
def mysql(element: agg, compiler: SQLCompiler, **kw: Dict[str, Any]) -> str: return "json_arrayagg(%s)" % compiler.process(element.clauses, **kw)
def sqlite(element: agg, compiler: SQLCompiler, **kw: Dict[str, Any]) -> str: return "json_group_array(%s)" % compiler.process(element.clauses, **kw)
def compile_parentheses(element: Parentheses, compiler: SQLCompiler, **kw): return "(%s)" % compiler.process(element.element, **kw)
def compile_extract_snowflake(element: Extract, compiler: SQLCompiler, **kw): return "date_part('%s', %s)" % ( compiler.process(literal_column(element.part), **kw), compiler.process(element.expr, **kw), )