Exemple #1
0
def status_report(*, dry_run: bool = False) -> Dict[str, Any]:
    period_start, period_end = get_previous_week()
    report: Dict[str, Any] = {
        "posthog_version": VERSION,
        "deployment": os.environ.get("DEPLOYMENT", "unknown"),
        "period": {"start_inclusive": period_start.isoformat(), "end_inclusive": period_end.isoformat()},
    }

    report["users_who_logged_in"] = [
        {"id": user.id, "distinct_id": user.distinct_id}
        if user.anonymize_data
        else {"id": user.id, "distinct_id": user.distinct_id, "first_name": user.first_name, "email": user.email}
        for user in User.objects.filter(last_login__gte=period_start)
    ]
    report["teams"] = {}
    report["table_sizes"] = {
        "posthog_event": fetch_table_size("posthog_event"),
        "posthog_sessionrecordingevent": fetch_table_size("posthog_sessionrecordingevent"),
    }

    for team in Team.objects.all():
        try:
            team_report: Dict[str, Any] = {}
            events_considered_total = Event.objects.filter(team_id=team.id)
            events_considered_new_in_period = events_considered_total.filter(
                timestamp__gte=period_start, timestamp__lte=period_end,
            )
            persons_considered_total = Person.objects.filter(team_id=team.id)
            persons_considered_total_new_in_period = persons_considered_total.filter(
                created_at__gte=period_start, created_at__lte=period_end,
            )
            team_report["events_count_total"] = events_considered_total.count()
            team_report["events_count_new_in_period"] = events_considered_new_in_period.count()
            team_report["persons_count_total"] = persons_considered_total.count()
            team_report["persons_count_new_in_period"] = persons_considered_total_new_in_period.count()

            params = (team.id, report["period"]["start_inclusive"], report["period"]["end_inclusive"])

            team_report["persons_count_active_in_period"] = fetch_persons_count_active_in_period(params)
            team_report["events_count_by_lib"] = fetch_event_counts_by_lib(params)
            team_report["events_count_by_name"] = fetch_events_count_by_name(params)

            report["teams"][team.id] = team_report
        except Exception as err:
            capture_event("instance status report failure", {"error": str(err)}, dry_run=dry_run)

    capture_event("instance status report", report, dry_run=dry_run)
    return report
def status_report(*, dry_run: bool = False) -> Dict[str, Any]:
    period_start, period_end = get_previous_week()
    report: Dict[str, Any] = {
        "posthog_version": VERSION,
        "deployment": os.getenv("DEPLOYMENT", "unknown"),
        "period": {
            "start_inclusive": period_start.isoformat(),
            "end_inclusive": period_end.isoformat()
        },
        "site_url": os.getenv("SITE_URL", "unknown"),
    }

    report["helm"] = get_helm_info_env()

    report["users_who_logged_in"] = [{
        "id": user.id,
        "distinct_id": user.distinct_id
    } if user.anonymize_data else {
        "id": user.id,
        "distinct_id": user.distinct_id,
        "first_name": user.first_name,
        "email": user.email
    } for user in User.objects.filter(last_login__gte=period_start)]
    report["teams"] = {}
    report["table_sizes"] = {
        "posthog_event":
        fetch_table_size("posthog_event"),
        "posthog_sessionrecordingevent":
        fetch_table_size("posthog_sessionrecordingevent"),
    }

    plugin_configs = PluginConfig.objects.select_related("plugin").all()

    report["plugins_installed"] = Counter(
        (plugin_config.plugin.name for plugin_config in plugin_configs))
    report["plugins_enabled"] = Counter((plugin_config.plugin.name
                                         for plugin_config in plugin_configs
                                         if plugin_config.enabled))

    for team in Team.objects.all():
        try:
            team_report: Dict[str, Any] = {}
            events_considered_total = Event.objects.filter(team_id=team.id)
            events_considered_new_in_period = events_considered_total.filter(
                timestamp__gte=period_start,
                timestamp__lte=period_end,
            )
            persons_considered_total = Person.objects.filter(team_id=team.id)
            persons_considered_total_new_in_period = persons_considered_total.filter(
                created_at__gte=period_start,
                created_at__lte=period_end,
            )
            team_report["events_count_total"] = events_considered_total.count()
            team_report[
                "events_count_new_in_period"] = events_considered_new_in_period.count(
                )
            team_report[
                "persons_count_total"] = persons_considered_total.count()
            team_report[
                "persons_count_new_in_period"] = persons_considered_total_new_in_period.count(
                )

            params = (team.id, report["period"]["start_inclusive"],
                      report["period"]["end_inclusive"])

            team_report[
                "persons_count_active_in_period"] = fetch_persons_count_active_in_period(
                    params)
            team_report["events_count_by_lib"] = fetch_event_counts_by_lib(
                params)
            team_report["events_count_by_name"] = fetch_events_count_by_name(
                params)

            # Dashboards
            team_dashboards = Dashboard.objects.filter(team=team).exclude(
                deleted=True)
            team_report["dashboards_count"] = team_dashboards.count()
            team_report["dashboards_template_count"] = team_dashboards.filter(
                creation_mode="template").count()
            team_report["dashboards_shared_count"] = team_dashboards.filter(
                is_shared=True).count()
            team_report["dashboards_tagged_count"] = team_dashboards.exclude(
                tags=[]).count()

            # Feature Flags
            feature_flags = FeatureFlag.objects.filter(team=team).exclude(
                deleted=True)
            team_report["ff_count"] = feature_flags.count()
            team_report["ff_active_count"] = feature_flags.filter(
                active=True).count()

            report["teams"][team.id] = team_report
        except Exception as err:
            capture_event("instance status report failure",
                          {"error": str(err)},
                          dry_run=dry_run)

    capture_event("instance status report", report, dry_run=dry_run)
    return report
Exemple #3
0
def status_report(*, dry_run: bool = False) -> Dict[str, Any]:
    period_start, period_end = get_previous_week()
    report: Dict[str, Any] = {
        "posthog_version": VERSION,
        "clickhouse_version":
        str(version_requirement.get_clickhouse_version()),
        "deployment": os.getenv("DEPLOYMENT", "unknown"),
        "realm": get_instance_realm(),
        "period": {
            "start_inclusive": period_start.isoformat(),
            "end_inclusive": period_end.isoformat()
        },
        "site_url": os.getenv("SITE_URL", "unknown"),
        "license_keys": get_instance_licenses(),
    }

    report["helm"] = get_helm_info_env()

    report["users_who_logged_in"] = [{
        "id": user.id,
        "distinct_id": user.distinct_id
    } if user.anonymize_data else {
        "id": user.id,
        "distinct_id": user.distinct_id,
        "first_name": user.first_name,
        "email": user.email
    } for user in User.objects.filter(is_active=True,
                                      last_login__gte=period_start)]
    report["teams"] = {}
    report["table_sizes"] = {
        "posthog_event":
        fetch_table_size("posthog_event"),
        "posthog_sessionrecordingevent":
        fetch_table_size("posthog_sessionrecordingevent"),
    }

    plugin_configs = PluginConfig.objects.select_related("plugin").all()

    report["plugins_installed"] = Counter(plugin_config.plugin.name
                                          for plugin_config in plugin_configs)
    report["plugins_enabled"] = Counter(plugin_config.plugin.name
                                        for plugin_config in plugin_configs
                                        if plugin_config.enabled)

    instance_usage_summary: Dict[str, int] = {
        "events_count_new_in_period": 0,
        "persons_count_new_in_period": 0,
        "persons_count_total": 0,
        "events_count_total": 0,
        "dashboards_count": 0,
        "ff_count": 0,
        "using_groups": False,
    }

    for team in Team.objects.exclude(organization__for_internal_metrics=True):
        try:
            params = (team.id, report["period"]["start_inclusive"],
                      report["period"]["end_inclusive"])
            team_report: Dict[str, Any] = {}
            # pull events stats from clickhouse
            from ee.clickhouse.models.event import (
                get_event_count_for_team,
                get_event_count_for_team_and_period,
                get_events_count_for_team_by_client_lib,
                get_events_count_for_team_by_event_type,
            )
            from ee.clickhouse.models.person import (
                count_duplicate_distinct_ids_for_team,
                count_total_persons_with_multiple_ids,
            )

            team_event_count = get_event_count_for_team(team.id)
            instance_usage_summary["events_count_total"] += team_event_count
            team_report["events_count_total"] = team_event_count
            team_events_in_period_count = get_event_count_for_team_and_period(
                team.id, period_start, period_end)
            team_report[
                "events_count_new_in_period"] = team_events_in_period_count
            instance_usage_summary[
                "events_count_new_in_period"] += team_report[
                    "events_count_new_in_period"]

            team_report[
                "events_count_by_lib"] = get_events_count_for_team_by_client_lib(
                    team.id, period_start, period_end)
            team_report[
                "events_count_by_name"] = get_events_count_for_team_by_event_type(
                    team.id, period_start, period_end)

            team_report[
                "duplicate_distinct_ids"] = count_duplicate_distinct_ids_for_team(
                    team.id)
            team_report[
                "multiple_ids_per_person"] = count_total_persons_with_multiple_ids(
                    team.id)
            team_report["group_types_count"] = GroupTypeMapping.objects.filter(
                team_id=team.id).count()

            if team_report["group_types_count"] > 0:
                instance_usage_summary["using_groups"] = True
            # pull person stats and the rest here from Postgres always
            persons_considered_total = Person.objects.filter(team_id=team.id)
            persons_considered_total_new_in_period = persons_considered_total.filter(
                created_at__gte=period_start,
                created_at__lte=period_end,
            )
            team_report[
                "persons_count_total"] = persons_considered_total.count()
            instance_usage_summary["persons_count_total"] += team_report[
                "persons_count_total"]

            team_report[
                "persons_count_new_in_period"] = persons_considered_total_new_in_period.count(
                )
            instance_usage_summary[
                "persons_count_new_in_period"] += team_report[
                    "persons_count_new_in_period"]

            # Dashboards
            team_dashboards = Dashboard.objects.filter(team=team).exclude(
                deleted=True)
            team_report["dashboards_count"] = team_dashboards.count()
            instance_usage_summary["dashboards_count"] += team_report[
                "dashboards_count"]
            team_report["dashboards_template_count"] = team_dashboards.filter(
                creation_mode="template").count()
            team_report["dashboards_shared_count"] = team_dashboards.filter(
                is_shared=True).count()
            team_report["dashboards_tagged_count"] = team_dashboards.exclude(
                tagged_items__isnull=True).count()

            # Feature Flags
            feature_flags = FeatureFlag.objects.filter(team=team).exclude(
                deleted=True)
            team_report["ff_count"] = feature_flags.count()
            instance_usage_summary["ff_count"] += team_report["ff_count"]
            team_report["ff_active_count"] = feature_flags.filter(
                active=True).count()
            report["teams"][team.id] = team_report
        except Exception as err:
            capture_event("instance status report failure",
                          {"error": str(err)},
                          dry_run=dry_run)

    report["instance_usage_summary"] = instance_usage_summary
    capture_event("instance status report", report, dry_run=dry_run)
    return report
Exemple #4
0
def send_weekly_email_report() -> None:
    """
    Sends the weekly email report to all users in a team.
    """

    if not is_email_available():
        logger.info(
            "Skipping send_weekly_email_report because email is not properly configured"
        )
        return

    period_start, period_end = get_previous_week()

    last_week_start: datetime.datetime = period_start - datetime.timedelta(7)
    last_week_end: datetime.datetime = period_end - datetime.timedelta(7)

    for team in Team.objects.all():

        event_data_set = Event.objects.filter(
            team=team,
            timestamp__gte=period_start,
            timestamp__lte=period_end,
        )

        active_users = PersonDistinctId.objects.filter(
            distinct_id__in=event_data_set.values(
                "distinct_id").distinct(), ).distinct()
        active_users_count: int = active_users.count()

        if active_users_count == 0:
            # TODO: Send an email prompting fix to no active users
            continue

        last_week_users = PersonDistinctId.objects.filter(
            distinct_id__in=Event.objects.filter(
                team=team,
                timestamp__gte=last_week_start,
                timestamp__lte=last_week_end,
            ).values("distinct_id").distinct(), ).distinct()
        last_week_users_count: int = last_week_users.count()

        two_weeks_ago_users = PersonDistinctId.objects.filter(
            distinct_id__in=Event.objects.filter(
                team=team,
                timestamp__gte=last_week_start - datetime.timedelta(7),
                timestamp__lte=last_week_end - datetime.timedelta(7),
            ).values("distinct_id").distinct(),
        ).distinct()  # used to compute delta in churned users
        two_weeks_ago_users_count: int = two_weeks_ago_users.count()

        not_last_week_users = PersonDistinctId.objects.filter(
            pk__in=active_users.difference(last_week_users, ).values_list(
                "pk",
                flat=True,
            ))  # users that were present this week but not last week

        churned_count = last_week_users.difference(active_users).count()
        churned_ratio: Optional[float] = (churned_count /
                                          last_week_users_count if
                                          last_week_users_count > 0 else None)
        last_week_churn_ratio: Optional[float] = (
            two_weeks_ago_users.difference(last_week_users).count() /
            two_weeks_ago_users_count
            if two_weeks_ago_users_count > 0 else None)
        churned_delta: Optional[float] = (
            churned_ratio / last_week_churn_ratio -
            1 if last_week_churn_ratio else None  # type: ignore
        )

        message = EmailMessage(
            f"PostHog weekly report for {period_start.strftime('%b %d, %Y')} to {period_end.strftime('%b %d')}",
            "weekly_report",
            {
                "preheader":
                f"Your PostHog weekly report is ready! Your team had {compact_number(active_users_count)} active users last week! 🎉",
                "team":
                team.name,
                "period_start":
                period_start,
                "period_end":
                period_end,
                "active_users":
                active_users_count,
                "active_users_delta":
                active_users_count / last_week_users_count -
                1 if last_week_users_count > 0 else None,
                "user_distribution": {
                    "new":
                    not_last_week_users.filter(
                        person__created_at__gte=period_start).count() /
                    active_users_count,
                    "retained":
                    active_users.intersection(last_week_users).count() /
                    active_users_count,
                    "resurrected":
                    not_last_week_users.filter(
                        person__created_at__lt=period_start).count() /
                    active_users_count,
                },
                "churned_users": {
                    "abs": churned_count,
                    "ratio": churned_ratio,
                    "delta": churned_delta
                },
            },
        )

        for user in team.organization.members.all():
            # TODO: Skip "unsubscribed" users
            message.add_recipient(user.email, user.first_name)

        # TODO: Schedule retry on failed attempt
        message.send()
Exemple #5
0
def _send_weekly_email_report_for_team(team_id: int) -> None:
    """
    Sends the weekly email report to all users in a team.
    """

    period_start, period_end = get_previous_week()
    last_week_start: datetime.datetime = period_start - datetime.timedelta(7)
    last_week_end: datetime.datetime = period_end - datetime.timedelta(7)

    campaign_key: str = f"weekly_report_for_team_{team_id}_on_{period_start.strftime('%Y-%m-%d')}"

    team = Team.objects.get(pk=team_id)

    event_data_set = Event.objects.filter(
        team=team,
        timestamp__gte=period_start,
        timestamp__lte=period_end,
    )

    active_users = PersonDistinctId.objects.filter(
        distinct_id__in=event_data_set.values(
            "distinct_id").distinct(), ).distinct()
    active_users_count: int = active_users.count()

    if active_users_count == 0:
        # TODO: Send an email prompting fix to no active users
        return

    last_week_users = PersonDistinctId.objects.filter(
        distinct_id__in=Event.objects.filter(
            team=team,
            timestamp__gte=last_week_start,
            timestamp__lte=last_week_end,
        ).values("distinct_id").distinct(), ).distinct()
    last_week_users_count: int = last_week_users.count()

    two_weeks_ago_users = PersonDistinctId.objects.filter(
        distinct_id__in=Event.objects.filter(
            team=team,
            timestamp__gte=last_week_start - datetime.timedelta(7),
            timestamp__lte=last_week_end - datetime.timedelta(7),
        ).values("distinct_id").distinct(),
    ).distinct()  # used to compute delta in churned users
    two_weeks_ago_users_count: int = two_weeks_ago_users.count()

    not_last_week_users = PersonDistinctId.objects.filter(
        pk__in=active_users.difference(last_week_users, ).values_list(
            "pk",
            flat=True,
        ))  # users that were present this week but not last week

    churned_count = last_week_users.difference(active_users).count()
    churned_ratio: Optional[float] = (churned_count / last_week_users_count
                                      if last_week_users_count > 0 else None)
    last_week_churn_ratio: Optional[float] = (
        two_weeks_ago_users.difference(last_week_users).count() /
        two_weeks_ago_users_count if two_weeks_ago_users_count > 0 else None)
    churned_delta: Optional[float] = (
        churned_ratio / last_week_churn_ratio -
        1 if last_week_churn_ratio else None  # type: ignore
    )

    message = EmailMessage(
        campaign_key=campaign_key,
        subject=
        f"PostHog weekly report for {period_start.strftime('%b %d, %Y')} to {period_end.strftime('%b %d')}",
        template_name="weekly_report",
        template_context={
            "preheader":
            f"Your PostHog weekly report is ready! Your team had {compact_number(active_users_count)} active users last week! 🎉",
            "team":
            team.name,
            "period_start":
            period_start,
            "period_end":
            period_end,
            "active_users":
            active_users_count,
            "active_users_delta":
            active_users_count / last_week_users_count -
            1 if last_week_users_count > 0 else None,
            "user_distribution": {
                "new":
                not_last_week_users.filter(
                    person__created_at__gte=period_start).count() /
                active_users_count,
                "retained":
                active_users.intersection(last_week_users).count() /
                active_users_count,
                "resurrected":
                not_last_week_users.filter(
                    person__created_at__lt=period_start).count() /
                active_users_count,
            },
            "churned_users": {
                "abs": churned_count,
                "ratio": churned_ratio,
                "delta": churned_delta
            },
        },
    )

    for user in team.organization.members.all():
        # TODO: Skip "unsubscribed" users
        message.add_recipient(email=user.email, name=user.first_name)

    message.send()
Exemple #6
0
def status_report() -> None:
    period_start, period_end = get_previous_week()
    report: Dict[str, Any] = {
        "posthog_version": VERSION,
        "period": {
            "start_inclusive": period_start.isoformat(),
            "end_inclusive": period_end.isoformat()
        },
    }
    report["users_who_logged_in"] = [{
        "id": user.id,
        "distinct_id": user.distinct_id
    } if user.anonymize_data else {
        "id": user.id,
        "distinct_id": user.distinct_id,
        "first_name": user.first_name,
        "email": user.email
    } for user in User.objects.filter(last_login__gte=period_start)]
    report["teams"] = {}
    for team in Team.objects.all():
        team_report: Dict[str, Any] = {}
        events_considered_total = Event.objects.filter(team_id=team.id)
        events_considered_new_in_period = events_considered_total.filter(
            created_at__gte=period_start,
            created_at__lte=period_end,
        )
        persons_considered_total = Event.objects.filter(team_id=team.id)
        persons_considered_total_new_in_period = persons_considered_total.filter(
            created_at__gte=period_start,
            created_at__lte=period_end,
        )
        team_report["events_count_total"] = events_considered_total.count()
        team_report[
            "events_count_new_in_period"] = events_considered_new_in_period.count(
            )
        team_report["persons_count_total"] = persons_considered_total.count()
        team_report[
            "persons_count_new_in_period"] = persons_considered_total_new_in_period.count(
            )

        with connection.cursor() as cursor:
            cursor.execute(
                sql.SQL("""
                SELECT COUNT(DISTINCT person_id) as persons_count
                FROM posthog_event JOIN posthog_persondistinctid ON (posthog_event.distinct_id = posthog_persondistinctid.distinct_id) WHERE posthog_event.team_id = %s AND posthog_event.created_at >= %s AND posthog_event.created_at < %s
            """),
                (team.id, report["period"]["start_inclusive"],
                 report["period"]["end_exclusive"]),
            )
            team_report["persons_count_active_in_period"] = cursor.fetchone(
            )[0]
            cursor.execute(
                sql.SQL("""
                SELECT properties->>'$lib' as lib, COUNT(*) as count
                FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY lib
            """),
                (team.id, report["period"]["start_inclusive"],
                 report["period"]["end_exclusive"]),
            )
            team_report["events_count_by_lib"] = {
                result.lib: result.count
                for result in namedtuplefetchall(cursor)
            }
            cursor.execute(
                sql.SQL("""
                SELECT event as name, COUNT(*) as count
                FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY name
            """),
                (team.id, report["period"]["start_inclusive"],
                 report["period"]["end_exclusive"]),
            )
            team_report["events_count_by_name"] = {
                result.name: result.count
                for result in namedtuplefetchall(cursor)
            }
        report["teams"][team.id] = team_report
    posthoganalytics.api_key = "sTMFPsFhdP1Ssg"
    disabled = posthoganalytics.disabled
    posthoganalytics.disabled = False
    posthoganalytics.capture(get_machine_id(), "instance status report",
                             report)
    posthoganalytics.disabled = disabled