def __init__(self) -> None: self.__grouped_message = get_dataset("groupedmessage") groupedmessage_source = (self.__grouped_message.get_dataset_schemas(). get_read_schema().get_data_source()) self.__events = get_dataset("events") events_source = (self.__events.get_dataset_schemas().get_read_schema(). get_data_source()) join_structure = JoinClause( left_node=TableJoinNode( table_name=groupedmessage_source.format_from(), columns=groupedmessage_source.get_columns(), mandatory_conditions=[ # TODO: This will be replaced as soon as expressions won't be strings # thus we will be able to easily add an alias to a column in an # expression. (qualified_column("record_deleted", self.GROUPS_ALIAS), "=", 0) ], prewhere_candidates=[ qualified_column(col, self.GROUPS_ALIAS) for col in groupedmessage_source.get_prewhere_candidates() ], alias=self.GROUPS_ALIAS, ), right_node=TableJoinNode( table_name=events_source.format_from(), columns=events_source.get_columns(), mandatory_conditions=[ (qualified_column("deleted", self.EVENTS_ALIAS), "=", 0) ], prewhere_candidates=[ qualified_column(col, self.EVENTS_ALIAS) for col in events_source.get_prewhere_candidates() ], alias=self.EVENTS_ALIAS, ), mapping=[ JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="project_id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="project_id"), ), JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="group_id"), ), ], join_type=JoinType.LEFT, ) schema = JoinedSchema(join_structure) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=None, ) super().__init__( dataset_schemas=dataset_schemas, time_group_columns={"events.time": "events.timestamp"}, time_parse_columns=[ "events.timestamp", "events.received", "groups.last_seen", "groups.first_seen", "groups.active_at", ], )
def __init__(self) -> None: self.__grouped_message = get_entity(EntityKey.GROUPEDMESSAGES) groupedmessage_source = (get_storage( StorageKey.GROUPEDMESSAGES).get_schema().get_data_source()) self.__events = get_entity(EntityKey.EVENTS) events_source = get_storage( StorageKey.EVENTS).get_schema().get_data_source() join_structure = JoinClause( left_node=TableJoinNode( table_name=groupedmessage_source.format_from(), columns=groupedmessage_source.get_columns(), mandatory_conditions=[ binary_condition( None, ConditionFunctions.EQ, Column(None, self.GROUPS_ALIAS, "record_deleted"), Literal(None, 0), ), ], prewhere_candidates=[ qualified_column(col, self.GROUPS_ALIAS) for col in groupedmessage_source.get_prewhere_candidates() ], alias=self.GROUPS_ALIAS, ), right_node=TableJoinNode( table_name=events_source.format_from(), columns=events_source.get_columns(), mandatory_conditions=[ binary_condition( None, ConditionFunctions.EQ, Column(None, self.EVENTS_ALIAS, "deleted"), Literal(None, 0), ), ], prewhere_candidates=[ qualified_column(col, self.EVENTS_ALIAS) for col in events_source.get_prewhere_candidates() ], alias=self.EVENTS_ALIAS, ), mapping=[ JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="project_id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="project_id"), ), JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="group_id"), ), ], join_type=JoinType.LEFT, ) schema = JoinedSchema(join_structure) storage = JoinedStorage(StorageSetKey.EVENTS, join_structure) super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=schema.get_columns(), writable_storage=None, )
("t3c1", UInt(64)), ("t3c2", String()), ("t3c3", Nested([("t31c4", UInt(64))])), ]), local_table_name="table3", dist_table_name="table3", order_by="", partition_by="", ).get_data_source() simple_join_structure = JoinClause( TableJoinNode(table1.format_from(), table1.get_columns(), [], [], "t1"), TableJoinNode(table2.format_from(), table2.get_columns(), [], [], "t2"), [ JoinCondition( left=JoinConditionExpression(table_alias="t1", column="t1c1"), right=JoinConditionExpression(table_alias="t2", column="t2c2"), ), JoinCondition( left=JoinConditionExpression(table_alias="t1", column="t1c3"), right=JoinConditionExpression(table_alias="t2", column="t2c4"), ), ], JoinType.INNER, ) complex_join_structure = JoinClause( JoinClause( TableJoinNode(table1.format_from(), table1.get_columns(), [], [], "t1"), TableJoinNode(table2.format_from(), table2.get_columns(), [], [], "t2"),
def __init__(self) -> None: self.__grouped_message = get_dataset("groupedmessage") groupedmessage_source = self.__grouped_message \ .get_dataset_schemas() \ .get_read_schema() \ .get_data_source() self.__events = get_dataset("events") events_source = self.__events \ .get_dataset_schemas() \ .get_read_schema() \ .get_data_source() join_structure = JoinClause( left_node=TableJoinNode( table_name=groupedmessage_source.format_from(), columns=groupedmessage_source.get_columns(), mandatory_conditions=[ # TODO: This will be replaced as soon as expressions won't be strings # thus we will be able to easily add an alias to a column in an # expression. (qualified_column('record_deleted', self.GROUPS_ALIAS), '=', 0) ], alias=self.GROUPS_ALIAS, ), right_node=TableJoinNode( table_name=events_source.format_from(), columns=events_source.get_columns(), mandatory_conditions=[ (qualified_column('deleted', self.EVENTS_ALIAS), '=', 0) ], alias=self.EVENTS_ALIAS, ), mapping=[ JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="project_id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="project_id"), ), JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="group_id"), ), ], join_type=JoinType.LEFT, ) schema = JoinedSchema(join_structure) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=None, ) super().__init__( dataset_schemas=dataset_schemas, time_group_columns={ 'events.time': 'events.timestamp', }, time_parse_columns=['events.timestamp'], )