def track_bad_query( query: Query, selected_entity: EntityKey, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet, ) -> None: event_columns = set() transaction_columns = set() for col in query.get_all_ast_referenced_columns(): if events_only_columns.get(col.column_name): event_columns.add(col.column_name) elif transactions_only_columns.get(col.column_name): transaction_columns.add(col.column_name) for subscript in query.get_all_ast_referenced_subscripts(): schema_col_name = subscript_key_column_name(subscript) if events_only_columns.get(schema_col_name): event_columns.add(schema_col_name) if transactions_only_columns.get(schema_col_name): transaction_columns.add(schema_col_name) event_mismatch = event_columns and selected_entity == TRANSACTIONS transaction_mismatch = transaction_columns and selected_entity in [ EVENTS, EVENTS_AND_TRANSACTIONS, ] if event_mismatch or transaction_mismatch: missing_columns = ",".join( sorted(event_columns if event_mismatch else transaction_columns)) selected_entity_str = (str(selected_entity.value) if isinstance( selected_entity, EntityKey) else selected_entity) metrics.increment( "query.impossible", tags={ "selected_table": selected_entity_str, "missing_columns": missing_columns, }, ) if selected_entity == EVENTS_AND_TRANSACTIONS and (event_columns or transaction_columns): # Not possible in future with merge table missing_events_columns = ",".join(sorted(event_columns)) missing_transactions_columns = ",".join(sorted(transaction_columns)) metrics.increment( "query.impossible-merge-table", tags={ "missing_events_columns": missing_events_columns, "missing_transactions_columns": missing_transactions_columns, }, ) else: metrics.increment("query.success")
def match_query_to_entity( query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet, ) -> EntityKey: # First check for a top level condition on the event type condition = query.get_condition_from_ast() event_types = set() if condition: top_level_condition = get_first_level_and_conditions(condition) for cond in top_level_condition: result = EVENT_CONDITION.match(cond) if not result: continue event_type_param = result.expression("event_type") if isinstance(event_type_param, Column): event_type = event_type_param.column_name elif isinstance(event_type_param, Literal): event_type = str(event_type_param.value) if result: if result.string("function") == ConditionFunctions.EQ: event_types.add(event_type) elif result.string("function") == ConditionFunctions.NEQ: if event_type == "transaction": return EVENTS if len(event_types) == 1 and "transaction" in event_types: return TRANSACTIONS if len(event_types) > 0 and "transaction" not in event_types: return EVENTS # If we cannot clearly pick an entity from the top level conditions, then # inspect the columns requested to infer a selection. has_event_columns = False has_transaction_columns = False for col in query.get_all_ast_referenced_columns(): if events_only_columns.get(col.column_name): has_event_columns = True elif transactions_only_columns.get(col.column_name): has_transaction_columns = True for subscript in query.get_all_ast_referenced_subscripts(): # Subscriptable references will not be properly recognized above # through get_all_ast_referenced_columns since the columns that # method will find will look like `tags` or `measurements`, while # the column sets contains `tags.key` and `tags.value`. schema_col_name = subscript_key_column_name(subscript) if events_only_columns.get(schema_col_name): has_event_columns = True if transactions_only_columns.get(schema_col_name): has_transaction_columns = True # Check for isHandled/notHandled if has_event_columns is False: for expr in query.get_all_expressions(): match = EVENT_FUNCTIONS.match(expr) if match: has_event_columns = True # Check for apdex or failure rate if has_transaction_columns is False: for expr in query.get_all_expressions(): match = TRANSACTION_FUNCTIONS.match(expr) if match: has_transaction_columns = True if has_event_columns and has_transaction_columns: # Impossible query, use the merge table return EVENTS_AND_TRANSACTIONS elif has_event_columns: return EVENTS elif has_transaction_columns: return TRANSACTIONS else: return EVENTS_AND_TRANSACTIONS
def detect_table( query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet, track_bad_queries: bool, ) -> EntityKey: """ Given a query, we attempt to guess whether it is better to fetch data from the "events", "transactions" or future merged storage. The merged storage resolves to the events storage until errors and transactions are split into separate physical tables. """ selected_table = match_query_to_table( query, events_only_columns, transactions_only_columns ) if track_bad_queries: event_columns = set() transaction_columns = set() for col in query.get_all_ast_referenced_columns(): if events_only_columns.get(col.column_name): event_columns.add(col.column_name) elif transactions_only_columns.get(col.column_name): transaction_columns.add(col.column_name) for subscript in query.get_all_ast_referenced_subscripts(): schema_col_name = subscript_key_column_name(subscript) if events_only_columns.get(schema_col_name): event_columns.add(schema_col_name) if transactions_only_columns.get(schema_col_name): transaction_columns.add(schema_col_name) event_mismatch = event_columns and selected_table == TRANSACTIONS transaction_mismatch = transaction_columns and selected_table in [ EVENTS, EVENTS_AND_TRANSACTIONS, ] if event_mismatch or transaction_mismatch: missing_columns = ",".join( sorted(event_columns if event_mismatch else transaction_columns) ) metrics.increment( "query.impossible", tags={ "selected_table": ( str(selected_table.value) if isinstance(selected_table, EntityKey) else selected_table ), "missing_columns": missing_columns, }, ) logger.warning("Discover generated impossible query", exc_info=True) if selected_table == EVENTS_AND_TRANSACTIONS and ( event_columns or transaction_columns ): # Not possible in future with merge table metrics.increment( "query.impossible-merge-table", tags={ "missing_events_columns": ",".join(sorted(event_columns)), "missing_transactions_columns": ",".join( sorted(transaction_columns) ), }, ) else: metrics.increment("query.success") # Default for events and transactions is events final_table = ( EntityKey.EVENTS if selected_table != TRANSACTIONS else EntityKey.TRANSACTIONS ) return final_table