def get_dialect_like_pattern_expression(column, dialect, like_pattern, positive=True): dialect_supported: bool = False try: # Bigquery if hasattr(dialect, "BigQueryDialect"): dialect_supported = True except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass if issubclass( dialect.dialect, ( sa.dialects.sqlite.dialect, sa.dialects.postgresql.dialect, sa.dialects.mysql.dialect, sa.dialects.mssql.dialect, ), ): dialect_supported = True try: # noinspection PyUnresolvedReferences if isinstance(dialect, sqlalchemy_redshift.dialect.RedshiftDialect): dialect_supported = True except (AttributeError, TypeError): pass try: if hasattr(dialect, "DremioDialect"): dialect_supported = True except (AttributeError, TypeError): pass try: if issubclass(dialect.dialect, teradatasqlalchemy.dialect.TeradataDialect): dialect_supported = True except (AttributeError, TypeError): pass if dialect_supported: try: if positive: return column.like(literal(like_pattern)) else: return sa.not_(column.like(literal(like_pattern))) except AttributeError: pass return None
def get_changed_users(session, last_update): """Get the users that have changed. Needed to update the profile when the user name has changed. """ return session.query(User.id, literal(USERPROFILE_TYPE)). \ filter(User.last_modified >= last_update). \ all()
def get_changed_outings_ro_uo(session, last_update): """ Returns the outings when associations between outing and route, or between outing and user have been created/removed. E.g. when an association between outing O1 and route R1 is created, outing O1 has to be updated so that all waypoints associated to R1 are listed under `associated_waypoints_ids`, and so that R1 is listed under `associated_routes_ids`. """ return session. \ query( AssociationLog.child_document_id.label('outing_id'), literal(OUTING_TYPE).label('type')). \ filter(or_( and_( AssociationLog.parent_document_type == ROUTE_TYPE, AssociationLog.child_document_type == OUTING_TYPE ), and_( AssociationLog.parent_document_type == USERPROFILE_TYPE, AssociationLog.child_document_type == OUTING_TYPE ) )). \ filter(AssociationLog.written_at >= last_update). \ group_by('outing_id', 'type'). \ all()
def visit_set_variables_mysql(element, compiler, **kw): clauses = [] for name, rvalue in element.values.items(): if not isinstance(rvalue, ClauseElement): rvalue = literal(rvalue) rvalue = compiler.process(rvalue) clauses.append('@%s := %s' % (name, rvalue)) return 'SET ' + ', '.join(clauses)
def get_changed_routes_wr(session, last_update): """ Returns the routes when associations between waypoint and route have been created/removed. E.g. when an association between waypoint W1 and route R1 is created, route R1 has to be updated so that W1 is listed under `associated_waypoints_ids`. """ return session. \ query(AssociationLog.child_document_id, literal(ROUTE_TYPE)). \ filter(and_( AssociationLog.parent_document_type == WAYPOINT_TYPE, AssociationLog.child_document_type == ROUTE_TYPE )). \ filter(AssociationLog.written_at >= last_update). \ all()
def get_dialect_regex_expression(column, regex, dialect, positive=True): try: # postgres if issubclass(dialect.dialect, sa.dialects.postgresql.dialect): if positive: return BinaryExpression(column, literal(regex), custom_op("~")) else: return BinaryExpression(column, literal(regex), custom_op("!~")) except AttributeError: pass try: # redshift if issubclass(dialect.dialect, sqlalchemy_redshift.dialect.RedshiftDialect): if positive: return BinaryExpression(column, literal(regex), custom_op("~")) else: return BinaryExpression(column, literal(regex), custom_op("!~")) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass try: # MySQL if issubclass(dialect.dialect, sa.dialects.mysql.dialect): if positive: return BinaryExpression(column, literal(regex), custom_op("REGEXP")) else: return BinaryExpression(column, literal(regex), custom_op("NOT REGEXP")) except AttributeError: pass try: # Snowflake if issubclass( dialect.dialect, snowflake.sqlalchemy.snowdialect.SnowflakeDialect, ): if positive: return BinaryExpression(column, literal(regex), custom_op("RLIKE")) else: return BinaryExpression(column, literal(regex), custom_op("NOT RLIKE")) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass try: # Bigquery if issubclass(dialect.dialect, pybigquery.sqlalchemy_bigquery.BigQueryDialect): if positive: return sa.func.REGEXP_CONTAINS(column, literal(regex)) else: return sa.not_(sa.func.REGEXP_CONTAINS(column, literal(regex))) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass return None
def visit_set_variable(element, compiler, **kw): val = element.value if not isinstance(val, ClauseElement): val = literal(val) rvalue = compiler.process(val) return 'SET %s = %s' % (element.name, rvalue)
def get_dialect_regex_expression(column, regex, dialect, positive=True): try: # postgres if issubclass(dialect.dialect, sa.dialects.postgresql.dialect): if positive: return BinaryExpression(column, literal(regex), custom_op("~")) else: return BinaryExpression(column, literal(regex), custom_op("!~")) except AttributeError: pass try: # redshift # noinspection PyUnresolvedReferences if issubclass(dialect.dialect, sqlalchemy_redshift.dialect.RedshiftDialect): if positive: return BinaryExpression(column, literal(regex), custom_op("~")) else: return BinaryExpression(column, literal(regex), custom_op("!~")) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass try: # MySQL if issubclass(dialect.dialect, sa.dialects.mysql.dialect): if positive: return BinaryExpression(column, literal(regex), custom_op("REGEXP")) else: return BinaryExpression(column, literal(regex), custom_op("NOT REGEXP")) except AttributeError: pass try: # Snowflake if issubclass( dialect.dialect, snowflake.sqlalchemy.snowdialect.SnowflakeDialect, ): if positive: return BinaryExpression(column, literal(regex), custom_op("RLIKE")) else: return BinaryExpression(column, literal(regex), custom_op("NOT RLIKE")) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass try: # Bigquery if hasattr(dialect, "BigQueryDialect"): if positive: return sa.func.REGEXP_CONTAINS(column, literal(regex)) else: return sa.not_(sa.func.REGEXP_CONTAINS(column, literal(regex))) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None logger.debug( "Unable to load BigQueryDialect dialect while running get_dialect_regex_expression in expectations.metrics.util", exc_info=True, ) pass try: # Dremio if hasattr(dialect, "DremioDialect"): if positive: return sa.func.REGEXP_MATCHES(column, literal(regex)) else: return sa.not_(sa.func.REGEXP_MATCHES(column, literal(regex))) except ( AttributeError, TypeError, ): # TypeError can occur if the driver was not installed and so is None pass try: # Teradata if issubclass(dialect.dialect, teradatasqlalchemy.dialect.TeradataDialect): if positive: return sa.func.REGEXP_SIMILAR(column, literal(regex), literal("i")) == 1 else: return sa.func.REGEXP_SIMILAR(column, literal(regex), literal("i")) == 0 except (AttributeError, TypeError): pass return None
def get_changed_routes_and_outings_ww(session, last_update): """ Returns the routes and outings when associations between waypoint and waypoint have been created/removed. E.g. when an association between waypoint W1 and W2 is created, all routes associated to W2, all routes associated to the direct children of W2 and all outings associated to these routes have to be updated. For example given the following associations: W1 -> W2, W2 -> W3, W3 -> R1 Route R1 has the following `associated_waypoint_ids`: W3, W2, W1 When association W1 -> W2 is deleted, all routes linked to W2 and all routes linked to the direct waypoint children of W2 (in this case W3) have to be updated. After the update, `associated_waypoint_ids` of R1 is: W3, W2 """ select_changed_waypoints = session. \ query(AssociationLog.child_document_id.label('waypoint_id')). \ filter(and_( AssociationLog.parent_document_type == WAYPOINT_TYPE, AssociationLog.child_document_type == WAYPOINT_TYPE )). \ filter(AssociationLog.written_at >= last_update). \ cte('changed_waypoints') select_changed_waypoint_children = session. \ query(Association.child_document_id.label('waypoint_id')). \ select_from(select_changed_waypoints). \ join( Association, and_( Association.parent_document_id == select_changed_waypoints.c.waypoint_id, Association.child_document_type == WAYPOINT_TYPE )). \ cte('changed_waypoint_children') select_all_changed_waypoints = union( select_changed_waypoints.select(), select_changed_waypoint_children.select()). \ cte('all_changed_waypoints') select_changed_routes = session. \ query( Association.child_document_id.label('route_id') ). \ select_from(select_all_changed_waypoints). \ join( Association, and_( Association.parent_document_id == select_all_changed_waypoints.c.waypoint_id, Association.child_document_type == ROUTE_TYPE )). \ group_by(Association.child_document_id). \ cte('changed_routes') select_changed_outings = session. \ query( Association.child_document_id.label('outing_id')). \ select_from(select_changed_routes). \ join( Association, and_( Association.parent_document_id == select_changed_routes.c.route_id, Association.child_document_type == OUTING_TYPE )). \ group_by(Association.child_document_id). \ cte('changed_outings') select_changed_routes_and_outings = union( session.query( select_changed_routes.c.route_id.label('document_id'), literal(ROUTE_TYPE).label('type') ).select_from(select_changed_routes), session.query( select_changed_outings.c.outing_id.label('document_id'), literal(OUTING_TYPE).label('type') ).select_from(select_changed_outings)). \ cte('changed_routes_and_outings') return session. \ query( select_changed_routes_and_outings.c.document_id, select_changed_routes_and_outings.c.type). \ select_from(select_changed_routes_and_outings). \ all()
def literal(self, value: str) -> Union[BindParameter]: if self._is_sqlalchemy: return literal(value) raise NotImplementedError()
def query( cls, select_query: Select, taxon_model_info_map: Dict[str, TaxonModelInfo], projection_taxons: SlugExprTaxonMap, data_source: str, order_by: Optional[List[TaxonDataOrder]], limit: Optional[int], offset: Optional[int], used_physical_data_sources: Set[str], dimension_templates: Optional[List[SqlFormulaTemplate]] = None, ) -> Dataframe: """ Generates the final projected dataframe :param select_query: Original query fetching all necessary fields :param taxon_model_info_map: Map of taxon slug expression to taxon model info :param projection_taxons: List of taxons meant to be projected by the final query :param data_source: Virtual data source for this subrequest :param order_by: List of clauses for order by :param limit: Limit for the query :param offset: Offset for the query :param dimension_templates: List of dimension templates :return: Final dataframe including all requested taxons """ group_by = [] selectors = [] projected_df_columns: Dict[TaxonExpressionStr, DataframeColumn] = {} for taxon in projection_taxons.values(): # apply aggregation, if you need to agg_type = taxon.tel_metadata_aggregation_type if agg_type and agg_type in cls._AGGREGATION_FUNCTIONS_MAP: col = cls._AGGREGATION_FUNCTIONS_MAP[agg_type](column(taxon.slug_safe_sql_identifier)) else: col = column(taxon.slug_safe_sql_identifier) col = col.label(taxon.slug_safe_sql_identifier) # create appropriate dataframe column value_quality_type = ValueQuantityType.scalar if not taxon.calculation and taxon.slug_expr in taxon_model_info_map: value_quality_type = taxon_model_info_map[taxon.slug_expr].quantity_type df_column_name = TaxonExpressionStr(taxon.slug) projected_df_columns[df_column_name] = DataframeColumn(df_column_name, taxon, value_quality_type) # make sure we select this column in the query selectors.append(col) # check whether this taxon should be in group by clause if agg_type in cls._GROUP_BY_AGGREGATION_TYPES: group_by.append(col) # make sure we select all columns for dimension templates for dim_template in dimension_templates or []: col = column(dim_template.label) selectors.append(col) # we should group by all dimension templates group_by.append(col) # On purpose adding this value to emulate USING ON FALSE => PROD-8136 selectors.append(literal(data_source).label(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME)) # using literal_column here because some database engines do not like grouping by constant group_by.append(literal_column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME)) # created this query new_query = Select( columns=sort_columns(selectors), order_by=[nullslast(ORDER_BY_FUNCTIONS[item.type](item.taxon)) for item in (order_by or [])], group_by=sort_columns(group_by), ).select_from(select_query) if limit is not None: new_query = new_query.limit(limit) if offset is not None: new_query = new_query.offset(offset) # collect names of all used models used_model_names = { model_info.model_name for model_info in taxon_model_info_map.values() if model_info.model_name is not None } return Dataframe(new_query, projected_df_columns, used_model_names, used_physical_data_sources)