Esempio n. 1
0
def test_joined_columns():
    schema = JoinedSchema(complex_join_structure)
    columns = schema.get_columns()

    expected_columns = ColumnSet([
        ("t1.t1c1", UInt(64)),
        ("t1.t1c2", String()),
        ("t1.t1c3", Nested([
            ("t11c4", UInt(64))
        ])),
        ("t2.t2c1", UInt(64)),
        ("t2.t2c2", String()),
        ("t2.t2c3", Nested([
            ("t21c4", UInt(64))
        ])),
        ("t3.t3c1", UInt(64)),
        ("t3.t3c2", String()),
        ("t3.t3c3", Nested([
            ("t31c4", UInt(64))
        ])),
    ])

    # Checks equality between flattened columns. Nested columns are
    # exploded here
    assert set([c.flattened for c in columns]) \
        == set([c.flattened for c in expected_columns])

    # Checks equality between the structured set of columns. Nested columns
    # are not exploded.
    assert set([repr(c) for c in columns.columns]) \
        == set([repr(c) for c in expected_columns.columns])
Esempio n. 2
0
    def __init__(self) -> None:
        self.__grouped_message = get_entity(EntityKey.GROUPEDMESSAGES)
        groupedmessage_source = (get_storage(
            StorageKey.GROUPEDMESSAGES).get_schema().get_data_source())

        self.__events = get_entity(EntityKey.EVENTS)
        events_source = get_storage(
            StorageKey.EVENTS).get_schema().get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.GROUPS_ALIAS, "record_deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.EVENTS_ALIAS, "deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        storage = JoinedStorage(StorageSetKey.EVENTS, join_structure)
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=None,
        )
Esempio n. 3
0
    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = (
            get_storage("groupedmessages").get_schemas().get_read_schema().get_data_source()
        )

        self.__events = get_dataset("events")
        events_source = get_storage("events").get_schemas().get_read_schema().get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column("record_deleted", self.GROUPS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column("deleted", self.EVENTS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(
                        table_alias=self.GROUPS_ALIAS, column="project_id"
                    ),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"
                    ),
                ),
                JoinCondition(
                    left=JoinConditionExpression(
                        table_alias=self.GROUPS_ALIAS, column="id"
                    ),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"
                    ),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        storage = JoinedStorage(join_structure)
        self.__time_group_columns = {"events.time": "events.timestamp"}
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=None,
            time_group_columns=self.__time_group_columns,
            time_parse_columns=[
                "events.timestamp",
                "events.received",
                "groups.last_seen",
                "groups.first_seen",
                "groups.active_at",
            ],
        )