def test_joined_columns(): schema = JoinedSchema(complex_join_structure) columns = schema.get_columns() expected_columns = ColumnSet([ ("t1.t1c1", UInt(64)), ("t1.t1c2", String()), ("t1.t1c3", Nested([ ("t11c4", UInt(64)) ])), ("t2.t2c1", UInt(64)), ("t2.t2c2", String()), ("t2.t2c3", Nested([ ("t21c4", UInt(64)) ])), ("t3.t3c1", UInt(64)), ("t3.t3c2", String()), ("t3.t3c3", Nested([ ("t31c4", UInt(64)) ])), ]) # Checks equality between flattened columns. Nested columns are # exploded here assert set([c.flattened for c in columns]) \ == set([c.flattened for c in expected_columns]) # Checks equality between the structured set of columns. Nested columns # are not exploded. assert set([repr(c) for c in columns.columns]) \ == set([repr(c) for c in expected_columns.columns])
def __init__( self, storage_set_key: StorageSetKey, join_structure: JoinClause, ) -> None: self.__structure = join_structure super().__init__(storage_set_key, JoinedSchema(self.__structure))
def __init__(self) -> None: self.__grouped_message = get_dataset("groupedmessage") groupedmessage_source = (self.__grouped_message.get_dataset_schemas(). get_read_schema().get_data_source()) self.__events = get_dataset("events") events_source = (self.__events.get_dataset_schemas().get_read_schema(). get_data_source()) join_structure = JoinClause( left_node=TableJoinNode( table_name=groupedmessage_source.format_from(), columns=groupedmessage_source.get_columns(), mandatory_conditions=[ # TODO: This will be replaced as soon as expressions won't be strings # thus we will be able to easily add an alias to a column in an # expression. (qualified_column("record_deleted", self.GROUPS_ALIAS), "=", 0) ], prewhere_candidates=[ qualified_column(col, self.GROUPS_ALIAS) for col in groupedmessage_source.get_prewhere_candidates() ], alias=self.GROUPS_ALIAS, ), right_node=TableJoinNode( table_name=events_source.format_from(), columns=events_source.get_columns(), mandatory_conditions=[ (qualified_column("deleted", self.EVENTS_ALIAS), "=", 0) ], prewhere_candidates=[ qualified_column(col, self.EVENTS_ALIAS) for col in events_source.get_prewhere_candidates() ], alias=self.EVENTS_ALIAS, ), mapping=[ JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="project_id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="project_id"), ), JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="group_id"), ), ], join_type=JoinType.LEFT, ) schema = JoinedSchema(join_structure) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=None, ) super().__init__( dataset_schemas=dataset_schemas, time_group_columns={"events.time": "events.timestamp"}, time_parse_columns=[ "events.timestamp", "events.received", "groups.last_seen", "groups.first_seen", "groups.active_at", ], )
def __init__(self) -> None: self.__grouped_message = get_entity(EntityKey.GROUPEDMESSAGES) groupedmessage_source = (get_storage( StorageKey.GROUPEDMESSAGES).get_schema().get_data_source()) self.__events = get_entity(EntityKey.EVENTS) events_source = get_storage( StorageKey.EVENTS).get_schema().get_data_source() join_structure = JoinClause( left_node=TableJoinNode( table_name=groupedmessage_source.format_from(), columns=groupedmessage_source.get_columns(), mandatory_conditions=[ binary_condition( None, ConditionFunctions.EQ, Column(None, self.GROUPS_ALIAS, "record_deleted"), Literal(None, 0), ), ], prewhere_candidates=[ qualified_column(col, self.GROUPS_ALIAS) for col in groupedmessage_source.get_prewhere_candidates() ], alias=self.GROUPS_ALIAS, ), right_node=TableJoinNode( table_name=events_source.format_from(), columns=events_source.get_columns(), mandatory_conditions=[ binary_condition( None, ConditionFunctions.EQ, Column(None, self.EVENTS_ALIAS, "deleted"), Literal(None, 0), ), ], prewhere_candidates=[ qualified_column(col, self.EVENTS_ALIAS) for col in events_source.get_prewhere_candidates() ], alias=self.EVENTS_ALIAS, ), mapping=[ JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="project_id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="project_id"), ), JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="group_id"), ), ], join_type=JoinType.LEFT, ) schema = JoinedSchema(join_structure) storage = JoinedStorage(StorageSetKey.EVENTS, join_structure) super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=schema.get_columns(), writable_storage=None, )
def get_schemas(self) -> StorageSchemas: return StorageSchemas( read_schema=JoinedSchema(self.__structure), write_schema=None )
def __init__(self) -> None: self.__grouped_message = get_dataset("groupedmessage") groupedmessage_source = self.__grouped_message \ .get_dataset_schemas() \ .get_read_schema() \ .get_data_source() self.__events = get_dataset("events") events_source = self.__events \ .get_dataset_schemas() \ .get_read_schema() \ .get_data_source() join_structure = JoinClause( left_node=TableJoinNode( table_name=groupedmessage_source.format_from(), columns=groupedmessage_source.get_columns(), mandatory_conditions=[ # TODO: This will be replaced as soon as expressions won't be strings # thus we will be able to easily add an alias to a column in an # expression. (qualified_column('record_deleted', self.GROUPS_ALIAS), '=', 0) ], alias=self.GROUPS_ALIAS, ), right_node=TableJoinNode( table_name=events_source.format_from(), columns=events_source.get_columns(), mandatory_conditions=[ (qualified_column('deleted', self.EVENTS_ALIAS), '=', 0) ], alias=self.EVENTS_ALIAS, ), mapping=[ JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="project_id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="project_id"), ), JoinCondition( left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS, column="id"), right=JoinConditionExpression( table_alias=self.EVENTS_ALIAS, column="group_id"), ), ], join_type=JoinType.LEFT, ) schema = JoinedSchema(join_structure) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=None, ) super().__init__( dataset_schemas=dataset_schemas, time_group_columns={ 'events.time': 'events.timestamp', }, time_parse_columns=['events.timestamp'], )