Exemple #1
0
        def fn_with_materialized(self, *args, **kwargs):
            # Don't run these tests under non-clickhouse classes even if decorated in base classes
            if not getattr(self, "RUN_MATERIALIZED_COLUMN_TESTS", False):
                return

            for prop in event_properties:
                materialize("events", prop)
            for prop in person_properties:
                materialize("person", prop)

            try:
                with self.capture_select_queries() as sqls:
                    fn(self, *args, **kwargs)
            finally:
                for prop in event_properties:
                    column_name = get_materialized_columns("events")[prop]
                    sync_execute(
                        f"ALTER TABLE events DROP COLUMN {column_name}")
                for prop in person_properties:
                    column_name = get_materialized_columns("person")[prop]
                    sync_execute(
                        f"ALTER TABLE person DROP COLUMN {column_name}")

            if verify_no_jsonextract:
                for sql in sqls:
                    self.assertNotIn("JSONExtract(properties", sql)
    def precheck(self):
        events_failed_table_exists = sync_execute(
            f"EXISTS {FAILED_EVENTS_TABLE_NAME}")[0][0]
        if events_failed_table_exists:
            return (
                False,
                f"{FAILED_EVENTS_TABLE_NAME} already exists. We use this table as a backup if the migration fails. You can delete or rename it and restart the migration.",
            )

        events_table = "sharded_events" if CLICKHOUSE_REPLICATION else "events"
        result = sync_execute(f"""
        SELECT (free_space.size / greatest(event_table_size.size, 1)) FROM
            (SELECT 1 as jc, 'event_table_size', sum(bytes) as size FROM system.parts WHERE table = '{events_table}' AND database='{CLICKHOUSE_DATABASE}') event_table_size
        JOIN
            (SELECT 1 as jc, 'free_disk_space', free_space as size FROM system.disks WHERE name = 'default') free_space
        ON event_table_size.jc=free_space.jc
        """)
        event_size_to_free_space_ratio = result[0][0]

        # Require 1.5x the events table in free space to be available
        if event_size_to_free_space_ratio > 1.5:
            return (True, None)
        else:
            result = sync_execute(f"""
            SELECT formatReadableSize(free_space.size - (free_space.free_space - (1.5 * event_table_size.size ))) as required FROM
                (SELECT 1 as jc, 'event_table_size', sum(bytes) as size FROM system.parts WHERE table = '{events_table}' AND database='{CLICKHOUSE_DATABASE}') event_table_size
            JOIN
                (SELECT 1 as jc, 'free_disk_space', free_space, total_space as size FROM system.disks WHERE name = 'default') free_space
            ON event_table_size.jc=free_space.jc
            """)
            required_space = result[0][0]
            return (
                False,
                f"Upgrade your ClickHouse storage to at least {required_space}."
            )
Exemple #3
0
def insert_stickiness_people_into_cohort(cohort: Cohort, target_entity: Entity,
                                         filter: StickinessFilter) -> None:
    content_sql, params = ClickhouseStickinessActors(
        entity=target_entity, filter=filter, team=cohort.team).actor_query()

    try:
        sync_execute(
            INSERT_COHORT_ALL_PEOPLE_SQL.format(
                content_sql=content_sql,
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query=""),
                cohort_table=PERSON_STATIC_COHORT_TABLE,
                GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                    cohort.team_id),
            ),
            {
                "cohort_id": cohort.pk,
                "_timestamp": datetime.now(),
                **params
            },
        )
        cohort.is_calculating = False
        cohort.last_calculation = timezone.now()
        cohort.errors_calculating = 0
        cohort.save()
    except Exception as err:
        if settings.DEBUG:
            raise err
        cohort.is_calculating = False
        cohort.errors_calculating = F("errors_calculating") + 1
        cohort.save()
        capture_exception(err)
Exemple #4
0
    def update(self, instance: InstanceSetting, validated_data: Dict[str, Any]) -> InstanceSetting:
        if instance.key not in SETTINGS_ALLOWING_API_OVERRIDE:
            raise serializers.ValidationError("This setting cannot be updated from the API.", code="no_api_override")

        if validated_data["value"] is None:
            raise serializers.ValidationError({"value": "This field is required."}, code="required")

        target_type = settings.CONFIG[instance.key][2]
        if target_type == "bool" and isinstance(validated_data["value"], bool):
            new_value_parsed = validated_data["value"]
        else:
            new_value_parsed = cast_str_to_desired_type(validated_data["value"], target_type)

        if instance.key == "RECORDINGS_TTL_WEEKS":

            if MULTI_TENANCY:
                # On cloud the TTL is set on the session_recording_events_sharded table,
                # so this command should never be run
                raise serializers.ValidationError("This setting cannot be updated on MULTI_TENANCY.")

            # TODO: Move to top-level imports once CH is moved out of `ee`
            from ee.clickhouse.sql.session_recording_events import UPDATE_RECORDINGS_TABLE_TTL_SQL
            from posthog.client import sync_execute

            sync_execute(UPDATE_RECORDINGS_TABLE_TTL_SQL(), {"weeks": new_value_parsed})

        setattr(config, instance.key, new_value_parsed)
        instance.value = new_value_parsed

        if instance.key.startswith("EMAIL_") and "request" in self.context:
            from posthog.tasks.email import send_canary_email

            send_canary_email.apply_async(kwargs={"user_email": self.context["request"].user.email})

        return instance
Exemple #5
0
    def test_split_person_clickhouse(self):
        person = _create_person(
            team=self.team, distinct_ids=["1", "2", "3"], properties={"$browser": "whatever", "$os": "Mac OS X"}
        )

        response = self.client.post("/api/person/%s/split/" % person.pk,).json()
        self.assertTrue(response["success"])

        people = Person.objects.all().order_by("id")
        clickhouse_people = sync_execute(
            "SELECT id FROM person FINAL WHERE team_id = %(team_id)s", {"team_id": self.team.pk}
        )
        self.assertCountEqual(clickhouse_people, [(person.uuid,) for person in people])

        distinct_id_rows = PersonDistinctId.objects.all().order_by("person_id")
        pdis = sync_execute(
            "SELECT person_id, distinct_id FROM person_distinct_id FINAL WHERE team_id = %(team_id)s",
            {"team_id": self.team.pk},
        )
        self.assertCountEqual(pdis, [(pdi.person.uuid, pdi.distinct_id) for pdi in distinct_id_rows])

        pdis2 = sync_execute(
            "SELECT person_id, distinct_id FROM person_distinct_id2 FINAL WHERE team_id = %(team_id)s",
            {"team_id": self.team.pk},
        )
        self.assertCountEqual(pdis2, [(pdi.person.uuid, pdi.distinct_id) for pdi in distinct_id_rows])
Exemple #6
0
def insert_actors_into_cohort_by_query(cohort: Cohort, query: str,
                                       params: Dict[str, Any]):
    try:
        sync_execute(
            INSERT_COHORT_ALL_PEOPLE_THROUGH_PERSON_ID.format(
                cohort_table=PERSON_STATIC_COHORT_TABLE, query=query),
            {
                "cohort_id": cohort.pk,
                "_timestamp": datetime.now(),
                "team_id": cohort.team.pk,
                **params
            },
        )

        cohort.is_calculating = False
        cohort.last_calculation = timezone.now()
        cohort.errors_calculating = 0
        cohort.save()
    except Exception as err:

        if settings.DEBUG:
            raise err
        cohort.is_calculating = False
        cohort.errors_calculating = F("errors_calculating") + 1
        cohort.save()
        capture_exception(err)
Exemple #7
0
    def test_client_strips_comments_from_request(self):
        """
        To ensure we can easily copy queries from `system.query_log` in e.g.
        Metabase, we strip comments from the query we send. Metabase doesn't
        display multilined output.

        See https://github.com/metabase/metabase/issues/14253

        Note I'm not really testing much complexity, I trust that those will
        come out as failures in other tests.
        """
        # First add in the request information that should be added to the sql.
        # We check this to make sure it is not removed by the comment stripping
        with self.capture_select_queries() as sqls:
            client._request_information = {"kind": "request", "id": "1"}
            sync_execute(query="""
                    -- this request returns 1
                    SELECT 1
                """)
            self.assertEqual(len(sqls), 1)
            first_query = sqls[0]
            self.assertIn(f"SELECT 1", first_query)
            self.assertNotIn("this request returns", first_query)

            # Make sure it still includes the "annotation" comment that includes
            # request routing information for debugging purposes
            self.assertIn("/* request:1 */", first_query)
Exemple #8
0
def get_clickhouse_query_stats(uuid):
    client.sync_execute("SYSTEM FLUSH LOGS")
    rows = client.sync_execute(
        f"""
        SELECT
            query_duration_ms,
            read_rows,
            read_bytes,
            memory_usage
        FROM system.query_log
        WHERE
            query NOT LIKE '%%query_log%%'
            AND query LIKE %(matcher)s
            AND type = 'QueryFinish'
        """,
        {"matcher": f"%benchmark:{uuid}%"},
    )

    return {
        "query_count": len(rows),
        "ch_query_time": int(sum(get_column(rows, 0))),
        "read_rows": sum(get_column(rows, 1)),
        "read_bytes": sum(get_column(rows, 2)),
        "memory_usage": sum(get_column(rows, 3)),
    }
Exemple #9
0
    def test_status_report_duplicate_distinct_ids(self) -> None:
        create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))

        for index in range(0, 2):
            sync_execute(
                "INSERT INTO person_distinct_id SELECT %(distinct_id)s, %(person_id)s, %(team_id)s, 1, %(timestamp)s, 0 VALUES",
                {
                    "distinct_id": "duplicate_id_old",
                    "person_id": str(UUIDT()),
                    "team_id": self.team.id,
                    "timestamp": "2020-01-01 12:01:0%s" % index,
                },
            )

        report = status_report(
            dry_run=True).get("teams")[self.team.id]  # type: ignore

        duplicate_ids_report = report["duplicate_distinct_ids"]

        expected_result = {
            "prev_total_ids_with_duplicates": 1,
            "prev_total_extra_distinct_id_rows": 1,
            "new_total_ids_with_duplicates": 2,
            "new_total_extra_distinct_id_rows": 4,
        }

        self.assertEqual(duplicate_ids_report, expected_result)
Exemple #10
0
def reset_clickhouse_tables():
    # Reset clickhouse tables to default before running test
    # Mostly so that test runs locally work correctly
    from ee.clickhouse.sql.cohort import TRUNCATE_COHORTPEOPLE_TABLE_SQL
    from ee.clickhouse.sql.dead_letter_queue import TRUNCATE_DEAD_LETTER_QUEUE_TABLE_SQL
    from ee.clickhouse.sql.events import TRUNCATE_EVENTS_TABLE_SQL
    from ee.clickhouse.sql.groups import TRUNCATE_GROUPS_TABLE_SQL
    from ee.clickhouse.sql.person import (
        TRUNCATE_PERSON_DISTINCT_ID2_TABLE_SQL,
        TRUNCATE_PERSON_DISTINCT_ID_TABLE_SQL,
        TRUNCATE_PERSON_STATIC_COHORT_TABLE_SQL,
        TRUNCATE_PERSON_TABLE_SQL,
    )
    from ee.clickhouse.sql.plugin_log_entries import TRUNCATE_PLUGIN_LOG_ENTRIES_TABLE_SQL
    from ee.clickhouse.sql.session_recording_events import TRUNCATE_SESSION_RECORDING_EVENTS_TABLE_SQL

    # REMEMBER TO ADD ANY NEW CLICKHOUSE TABLES TO THIS ARRAY!
    TABLES_TO_CREATE_DROP = [
        TRUNCATE_EVENTS_TABLE_SQL(),
        TRUNCATE_PERSON_TABLE_SQL,
        TRUNCATE_PERSON_DISTINCT_ID_TABLE_SQL,
        TRUNCATE_PERSON_DISTINCT_ID2_TABLE_SQL,
        TRUNCATE_PERSON_STATIC_COHORT_TABLE_SQL,
        TRUNCATE_SESSION_RECORDING_EVENTS_TABLE_SQL(),
        TRUNCATE_PLUGIN_LOG_ENTRIES_TABLE_SQL,
        TRUNCATE_COHORTPEOPLE_TABLE_SQL,
        TRUNCATE_DEAD_LETTER_QUEUE_TABLE_SQL,
        TRUNCATE_DEAD_LETTER_QUEUE_TABLE_MV_SQL,
        TRUNCATE_GROUPS_TABLE_SQL,
    ]

    for item in TABLES_TO_CREATE_DROP:
        sync_execute(item)
Exemple #11
0
def get_property_values_for_key(key: str,
                                team: Team,
                                value: Optional[str] = None):
    property_field, _ = get_property_string_expr("events", key, "%(key)s",
                                                 "properties")
    parsed_date_from = "AND timestamp >= '{}'".format(
        relative_date_parse("-7d").strftime("%Y-%m-%d 00:00:00"))
    parsed_date_to = "AND timestamp <= '{}'".format(
        timezone.now().strftime("%Y-%m-%d 23:59:59"))

    if value:
        return sync_execute(
            SELECT_PROP_VALUES_SQL_WITH_FILTER.format(
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                property_field=property_field),
            {
                "team_id": team.pk,
                "key": key,
                "value": "%{}%".format(value)
            },
        )
    return sync_execute(
        SELECT_PROP_VALUES_SQL.format(parsed_date_from=parsed_date_from,
                                      parsed_date_to=parsed_date_to,
                                      property_field=property_field),
        {
            "team_id": team.pk,
            "key": key
        },
    )
Exemple #12
0
    def progress(self, _):
        result = sync_execute(f"SELECT COUNT(1) FROM {TEMPORARY_TABLE_NAME}")
        result2 = sync_execute(
            f"SELECT COUNT(1) FROM {PERSONS_DISTINCT_ID_TABLE}")
        total_events_to_move = result2[0][0]
        total_events_moved = result[0][0]

        progress = 100 * total_events_moved / total_events_to_move
        return progress
Exemple #13
0
    def test_is_required(self):
        from posthog.client import sync_execute

        self.assertTrue(self.migration.is_required())

        sync_execute(
            "ALTER TABLE person_distinct_id COMMENT COLUMN distinct_id 'skip_0003_fill_person_distinct_id2'"
        )
        self.assertFalse(self.migration.is_required())
Exemple #14
0
    def test_events_columns_in_inconsistent_state(self):
        materialize("events", "$session_id")
        materialize("events", "$window_id")

        sync_execute(
            "ALTER TABLE events RENAME COLUMN mat_$session_id TO $session_id")

        materialize_session_and_window_id(CLICKHOUSE_DATABASE)
        self.assert_desired_state()
Exemple #15
0
    def create_missing_tables(self, out_of_sync_hosts: Dict[HostName,
                                                            Set[TableName]],
                              create_table_queries: Dict[TableName, Query]):
        missing_tables = set(table for tables in out_of_sync_hosts.values()
                             for table in tables)

        logger.info("Creating missing tables", missing_tables=missing_tables)
        for table in missing_tables:
            query = create_table_queries[table]
            sync_execute(self.run_on_cluster(query))
Exemple #16
0
def insert_static_cohort(person_uuids: List[Optional[uuid.UUID]],
                         cohort_id: int, team: Team):
    persons = ({
        "id": str(uuid.uuid4()),
        "person_id": str(person_uuid),
        "cohort_id": cohort_id,
        "team_id": team.pk,
        "_timestamp": datetime.now(),
    } for person_uuid in person_uuids)
    sync_execute(INSERT_PERSON_STATIC_COHORT, persons)
 def test_direct_table_insert(self):
     inserted_dlq_event = get_dlq_event()
     sync_execute(
         INSERT_DEAD_LETTER_QUEUE_EVENT_SQL,
         inserted_dlq_event,
     )
     query_result = sync_execute(f"SELECT * FROM {DEAD_LETTER_QUEUE_TABLE}")
     events_returned = convert_query_result_to_dlq_event_dicts(query_result)
     # TRICKY: because it's hard to truncate the dlq table, we just check if the event is in the table along with events from other tests
     # Because each generated event is unique, this works
     self.assertIn(inserted_dlq_event, events_returned)
Exemple #18
0
    def test_is_required(self):
        from posthog.client import sync_execute

        migration = get_async_migration_definition(MIGRATION_NAME)

        self.assertTrue(migration.is_required())

        settings.CLICKHOUSE_REPLICATION = True
        sync_execute("DROP TABLE events SYNC")
        sync_execute(DISTRIBUTED_EVENTS_TABLE_SQL())
        self.assertFalse(migration.is_required())
Exemple #19
0
def delete_teams_data(team_ids: List[int]):
    logger.info(
        f"Deleting teams data from clickhouse using background mutations.",
        team_ids=team_ids,
        tables=TABLES_TO_DELETE_FROM(),
    )
    for table in TABLES_TO_DELETE_FROM():
        sync_execute(
            f"ALTER TABLE {table} ON CLUSTER '{CLICKHOUSE_CLUSTER}' DELETE WHERE team_id IN %(team_ids)s",
            {"team_ids": team_ids},
        )
Exemple #20
0
    def create_distinct_id(self, **kwargs):
        from posthog.client import sync_execute

        sync_execute(
            "INSERT INTO person_distinct_id SELECT %(distinct_id)s, %(person_id)s, %(team_id)s, %(sign)s, %(timestamp)s, 0 VALUES",
            {
                **kwargs,
                "timestamp":
                datetime(2020, 1, 2) + timedelta(days=self.timestamp),
            },
        )
        self.timestamp += 1
Exemple #21
0
 def produce(self,
             sql: str,
             topic: str,
             data: Dict[str, Any],
             sync: bool = True):
     if self.send_to_kafka:
         self.producer.produce(topic=topic, data=data)
     else:
         if sync:
             sync_execute(sql, data)
         else:
             async_execute(sql, data)
Exemple #22
0
    def test_person_cohort_properties(self):
        person1_distinct_id = "person1"
        person1 = Person.objects.create(
            team=self.team, distinct_ids=[person1_distinct_id], properties={"$some_prop": "something"}
        )

        cohort1 = Cohort.objects.create(
            team=self.team,
            groups=[{"properties": [{"type": "person", "key": "$some_prop", "value": "something"}]}],
            name="cohort1",
        )

        person2_distinct_id = "person2"
        person2 = Person.objects.create(
            team=self.team, distinct_ids=[person2_distinct_id], properties={"$some_prop": "different"}
        )
        cohort2 = Cohort.objects.create(
            team=self.team,
            groups=[
                {"properties": [{"type": "person", "key": "$some_prop", "value": "something", "operator": "is_not"}]}
            ],
            name="cohort2",
        )

        filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team)

        prop_clause, prop_clause_params = parse_prop_grouped_clauses(
            property_group=filter.property_groups, has_person_id_joined=False, team_id=self.team.pk
        )
        query = """
        SELECT distinct_id FROM person_distinct_id WHERE team_id = %(team_id)s {prop_clause}
        """.format(
            prop_clause=prop_clause
        )
        # get distinct_id column of result
        result = sync_execute(query, {"team_id": self.team.pk, **prop_clause_params})[0][0]
        self.assertEqual(result, person1_distinct_id)

        # test cohort2 with negation
        filter = Filter(data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}],}, team=self.team)
        prop_clause, prop_clause_params = parse_prop_grouped_clauses(
            property_group=filter.property_groups, has_person_id_joined=False, team_id=self.team.pk
        )
        query = """
        SELECT distinct_id FROM person_distinct_id WHERE team_id = %(team_id)s {prop_clause}
        """.format(
            prop_clause=prop_clause
        )
        # get distinct_id column of result
        result = sync_execute(query, {"team_id": self.team.pk, **prop_clause_params})[0][0]

        self.assertEqual(result, person2_distinct_id)
Exemple #23
0
def is_clickhouse_connected() -> bool:
    """
    Check we can perform a super simple Clickhouse query.

    Returns `True` if so, `False` otherwise
    """
    try:
        sync_execute("SELECT 1")
    except ClickhouseError:
        logger.debug("clickhouse_connection_failure", exc_info=True)
        return False

    return True
    def test_create_missing_tables(self):
        self.recreate_database(create_tables=True)
        materialize("events", "some_property")
        _, create_table_queries, _ = Command().analyze_cluster_tables()
        sync_execute("DROP TABLE sharded_events SYNC")

        self.assertIn("mat_some_property",
                      create_table_queries["sharded_events"])
        Command().create_missing_tables({"test_host": {"sharded_events"}},
                                        create_table_queries)

        schema = sync_execute("SHOW CREATE TABLE sharded_events")[0][0]
        self.assertIn("mat_some_property", schema)
Exemple #25
0
 def _insert_cohortpeople_row(self, team: Team, person_id: UUID,
                              cohort_id: int):
     sync_execute(
         f"""
         INSERT INTO cohortpeople (person_id, cohort_id, team_id, sign)
         VALUES (%(person_id)s, %(cohort_id)s, %(team_id)s, 1)
     """,
         {
             "person_id": str(person_id),
             "cohort_id": cohort_id,
             "team_id": team.pk
         },
     )
Exemple #26
0
def backfill_materialized_columns(table: TableWithProperties,
                                  properties: List[PropertyName],
                                  backfill_period: timedelta,
                                  test_settings=None) -> None:
    """
    Backfills the materialized column after its creation.

    This will require reading and writing a lot of data on clickhouse disk.
    """

    if len(properties) == 0:
        return

    updated_table = "sharded_events" if clickhouse_is_replicated(
    ) and table == "events" else table
    # :TRICKY: On cloud, we ON CLUSTER updates to events/sharded_events but not to persons. Why? ¯\_(ツ)_/¯
    execute_on_cluster = f"ON CLUSTER '{CLICKHOUSE_CLUSTER}'" if table == "events" else ""

    materialized_columns = get_materialized_columns(table, use_cache=False)

    # Hack from https://github.com/ClickHouse/ClickHouse/issues/19785
    # Note that for this to work all inserts should list columns explicitly
    # Improve this if https://github.com/ClickHouse/ClickHouse/issues/27730 ever gets resolved
    for property in properties:
        sync_execute(
            f"""
            ALTER TABLE {updated_table}
            {execute_on_cluster}
            MODIFY COLUMN
            {materialized_columns[property]} VARCHAR DEFAULT {TRIM_AND_EXTRACT_PROPERTY}
            """,
            {"property": property},
            settings=test_settings,
        )

    # Kick off mutations which will update clickhouse partitions in the background. This will return immediately
    assignments = ", ".join(
        f"{materialized_columns[property]} = {materialized_columns[property]}"
        for property in properties)

    sync_execute(
        f"""
        ALTER TABLE {updated_table}
        {execute_on_cluster}
        UPDATE {assignments}
        WHERE {"timestamp > %(cutoff)s" if table == "events" else "1 = 1"}
        """,
        {"cutoff": (now() - backfill_period).strftime("%Y-%m-%d")},
        settings=test_settings,
    )
Exemple #27
0
def test_parse_breakdown_cohort_query(db, team):
    action = Action.objects.create(team=team, name="$pageview")
    ActionStep.objects.create(action=action, event="$pageview")
    cohort1 = Cohort.objects.create(
        team=team,
        groups=[{
            "action_id": action.pk,
            "start_date": datetime(2020, 1, 8, 12, 0, 1)
        }],
        name="cohort1",
    )
    queries, params = _parse_breakdown_cohorts([cohort1])
    assert len(queries) == 1
    sync_execute(queries[0], params)
Exemple #28
0
def create_clickhouse_tables(num_tables: int):
    # Reset clickhouse tables to default before running test
    # Mostly so that test runs locally work correctly
    from ee.clickhouse.sql.cohort import CREATE_COHORTPEOPLE_TABLE_SQL
    from ee.clickhouse.sql.dead_letter_queue import DEAD_LETTER_QUEUE_TABLE_SQL
    from ee.clickhouse.sql.events import DISTRIBUTED_EVENTS_TABLE_SQL, EVENTS_TABLE_SQL, WRITABLE_EVENTS_TABLE_SQL
    from ee.clickhouse.sql.groups import GROUPS_TABLE_SQL
    from ee.clickhouse.sql.person import (
        PERSON_DISTINCT_ID2_TABLE_SQL,
        PERSON_STATIC_COHORT_TABLE_SQL,
        PERSONS_DISTINCT_ID_TABLE_SQL,
        PERSONS_TABLE_SQL,
    )
    from ee.clickhouse.sql.plugin_log_entries import PLUGIN_LOG_ENTRIES_TABLE_SQL
    from ee.clickhouse.sql.session_recording_events import (
        DISTRIBUTED_SESSION_RECORDING_EVENTS_TABLE_SQL,
        SESSION_RECORDING_EVENTS_TABLE_SQL,
        WRITABLE_SESSION_RECORDING_EVENTS_TABLE_SQL,
    )

    # REMEMBER TO ADD ANY NEW CLICKHOUSE TABLES TO THIS ARRAY!
    TABLES_TO_CREATE_DROP = [
        EVENTS_TABLE_SQL(),
        PERSONS_TABLE_SQL(),
        PERSONS_DISTINCT_ID_TABLE_SQL(),
        PERSON_DISTINCT_ID2_TABLE_SQL(),
        PERSON_STATIC_COHORT_TABLE_SQL(),
        SESSION_RECORDING_EVENTS_TABLE_SQL(),
        PLUGIN_LOG_ENTRIES_TABLE_SQL(),
        CREATE_COHORTPEOPLE_TABLE_SQL(),
        KAFKA_DEAD_LETTER_QUEUE_TABLE_SQL(),
        DEAD_LETTER_QUEUE_TABLE_SQL(),
        DEAD_LETTER_QUEUE_TABLE_MV_SQL,
        GROUPS_TABLE_SQL(),
    ]

    if settings.CLICKHOUSE_REPLICATION:
        TABLES_TO_CREATE_DROP.extend([
            DISTRIBUTED_EVENTS_TABLE_SQL(),
            WRITABLE_EVENTS_TABLE_SQL(),
            DISTRIBUTED_SESSION_RECORDING_EVENTS_TABLE_SQL(),
            WRITABLE_SESSION_RECORDING_EVENTS_TABLE_SQL(),
        ])

    if num_tables == len(TABLES_TO_CREATE_DROP):
        return

    for item in TABLES_TO_CREATE_DROP:
        sync_execute(item)
Exemple #29
0
    def _query_related_groups(
            self, group_type_index: GroupTypeIndex) -> List[SerializedGroup]:
        if group_type_index == self.group_type_index:
            return []

        group_ids = self._take_first(
            sync_execute(
                f"""
            SELECT DISTINCT $group_{group_type_index} AS group_key
            FROM events e
            {'' if self.is_aggregating_by_groups else self._distinct_ids_join}
            JOIN (
                SELECT group_key
                FROM groups
                WHERE team_id = %(team_id)s AND group_type_index = %(group_type_index)s
                GROUP BY group_key
            ) groups ON $group_{group_type_index} = groups.group_key
            WHERE team_id = %(team_id)s
              AND timestamp > %(after)s
              AND timestamp < %(before)s
              AND group_key != ''
              AND {self._filter_clause}
            ORDER BY group_key
            """,
                {
                    **self._params, "group_type_index": group_type_index
                },
            ))

        _, serialize_groups = get_groups(self.team_id, group_type_index,
                                         group_ids)
        return serialize_groups
Exemple #30
0
def set_created_at(apps, schema_editor):

    try:
        from posthog.client import sync_execute
    except ImportError:
        sync_execute = None  # type: ignore

    EventDefinition = apps.get_model("posthog", "EventDefinition")
    for instance in EventDefinition.objects.filter(created_at=None):
        created_at = None
        result = None
        if sync_execute:
            result = sync_execute(
                "SELECT timestamp FROM events where team_id=%(team_id)s AND event=%(event)s"
                " order by timestamp limit 1",
                {
                    "team_id": instance.team.pk,
                    "event": instance.name,
                },
            )
        if result:
            created_at = result[0][0]

        if created_at:
            instance.created_at = created_at
            instance.save()