예제 #1
0
        def test_pagination(self):
            person_factory(team=self.team, distinct_ids=["1"])
            for idx in range(0, 150):
                event_factory(
                    team=self.team,
                    event="some event",
                    distinct_id="1",
                    timestamp=datetime(2019, 1, 1, 12, 0, 0) +
                    relativedelta(days=idx, seconds=idx),
                )
            response = self.client.get("/api/event/?distinct_id=1").json()
            self.assertEqual(len(response["results"]), 100)
            self.assertIn("http://testserver/api/event/?distinct_id=1&before=",
                          response["next"])

            page2 = self.client.get(response["next"]).json()
            from posthog.ee import check_ee_enabled

            if check_ee_enabled():
                from ee.clickhouse.client import sync_execute

                self.assertEqual(
                    sync_execute("select count(*) from events")[0][0], 150)

            self.assertEqual(len(page2["results"]), 50)
예제 #2
0
파일: demo.py 프로젝트: zhang1998/posthog
def demo(request):
    user = request.user
    organization = user.organization
    try:
        team = organization.teams.get(name=TEAM_NAME)
    except Team.DoesNotExist:
        team = Team.objects.create_with_data(organization=organization,
                                             name=TEAM_NAME,
                                             ingested_event=True,
                                             completed_snippet_onboarding=True)
        _create_anonymous_users(team=team,
                                base_url=request.build_absolute_uri("/demo"))
        _create_funnel(team=team, base_url=request.build_absolute_uri("/demo"))
        _recalculate(team=team)
    user.current_team = team
    user.save()
    if "$pageview" not in team.event_names:
        team.event_names.append("$pageview")
        team.save()

    if check_ee_enabled():
        from ee.clickhouse.demo import create_anonymous_users_ch
        from ee.clickhouse.models.event import get_events_by_team

        result = get_events_by_team(team_id=team.pk)
        if not result:
            create_anonymous_users_ch(
                team=team, base_url=request.build_absolute_uri("/demo"))

    return render_template("demo.html",
                           request=request,
                           context={"api_token": team.api_token})
예제 #3
0
def calculate_actions_ch(action: Action) -> None:
    if check_ee_enabled():
        try:
            from ee.clickhouse.models.action import populate_action_event_table

            populate_action_event_table(action)
        except:
            logger.error("Could not update clickhouse tables")
예제 #4
0
def calculate_cohorts_ch(cohort: Cohort) -> None:
    if check_ee_enabled():
        try:
            from ee.clickhouse.models.cohort import populate_cohort_person_table

            populate_cohort_person_table(cohort)
        except:
            logger.error("Could not update clickhouse cohort tables")
예제 #5
0
파일: celery.py 프로젝트: sbauch/posthog
def clickhouse_lag():
    if check_ee_enabled() and settings.EE_AVAILABLE:
        from ee.clickhouse.client import sync_execute

        QUERY = """select max(_timestamp) observed_ts, now() now_ts, now() - max(_timestamp) as lag from events;"""
        lag = sync_execute(QUERY)[0][2]
        g = statsd.Gauge("%s_posthog_celery" % (settings.STATSD_PREFIX, ))
        g.send("clickhouse_even_table_lag_seconds", lag)
    else:
        pass
예제 #6
0
파일: celery.py 프로젝트: sbauch/posthog
def setup_periodic_tasks(sender, **kwargs):
    if not settings.DEBUG:
        sender.add_periodic_task(1.0,
                                 redis_celery_queue_depth.s(),
                                 name="1 sec queue probe",
                                 priority=0)

    # Heartbeat every 10sec to make sure the worker is alive
    sender.add_periodic_task(10.0,
                             redis_heartbeat.s(),
                             name="10 sec heartbeat",
                             priority=0)

    # update events table partitions twice a week
    sender.add_periodic_task(
        crontab(day_of_week="mon,fri", hour=0, minute=0),
        update_event_partitions.s(),  # check twice a week
    )

    if getattr(settings, "MULTI_TENANCY", False) or os.environ.get(
            "SESSION_RECORDING_RETENTION_CRONJOB", False):

        sender.add_periodic_task(crontab(minute=0, hour="*/12"),
                                 run_session_recording_retention.s())

    # send weekly status report on non-PostHog Cloud instances
    if not getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0),
                                 status_report.s())

    # send weekly email report (~ 8:00 SF / 16:00 UK / 17:00 EU)
    sender.add_periodic_task(crontab(day_of_week="mon", hour=15, minute=0),
                             send_weekly_email_report.s())

    sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0),
                             clean_stale_partials.s())

    if not check_ee_enabled():
        sender.add_periodic_task(15 * 60, calculate_cohort.s(), name="debug")
        sender.add_periodic_task(600,
                                 check_cached_items.s(),
                                 name="check dashboard items")
    else:
        # ee enabled scheduled tasks
        sender.add_periodic_task(120,
                                 clickhouse_lag.s(),
                                 name="clickhouse event table lag")

    if settings.ASYNC_EVENT_ACTION_MAPPING:
        sender.add_periodic_task(
            (60 * ACTION_EVENT_MAPPING_INTERVAL_MINUTES),
            calculate_event_action_mappings.s(),
            name="calculate event action mappings",
            expires=(60 * ACTION_EVENT_MAPPING_INTERVAL_MINUTES),
        )
예제 #7
0
def demo(request):
    team = request.user.team
    if not Event.objects.filter(team=team).exists():
        _create_anonymous_users(team=team,
                                base_url=request.build_absolute_uri("/demo"))
        _create_funnel(team=team, base_url=request.build_absolute_uri("/demo"))
        _recalculate(team=team)
    if "$pageview" not in team.event_names:
        team.event_names.append("$pageview")
        team.save()

    if check_ee_enabled():
        from ee.clickhouse.demo import create_anonymous_users_ch
        from ee.clickhouse.models.event import get_events_by_team

        result = get_events_by_team(team_id=team.pk)
        if not result:
            create_anonymous_users_ch(
                team=team, base_url=request.build_absolute_uri("/demo"))

    return render_template("demo.html",
                           request=request,
                           context={"api_token": team.api_token})
예제 #8
0
파일: person.py 프로젝트: sbauch/posthog
    INSERT_PERSON_SQL,
    PERSON_DISTINCT_ID_EXISTS_SQL,
    UPDATE_PERSON_ATTACHED_DISTINCT_ID,
    UPDATE_PERSON_IS_IDENTIFIED,
    UPDATE_PERSON_PROPERTIES,
)
from ee.kafka_client.client import ClickhouseProducer
from ee.kafka_client.topics import KAFKA_PERSON, KAFKA_PERSON_UNIQUE_ID
from posthog import settings
from posthog.ee import check_ee_enabled
from posthog.models.filter import Filter
from posthog.models.person import Person, PersonDistinctId
from posthog.models.team import Team
from posthog.models.utils import UUIDT

if settings.EE_AVAILABLE and check_ee_enabled():

    @receiver(post_save, sender=Person)
    def person_created(sender, instance: Person, created, **kwargs):
        create_person(
            team_id=instance.team.pk,
            properties=instance.properties,
            uuid=str(instance.uuid),
            is_identified=instance.is_identified,
        )

    @receiver(post_save, sender=PersonDistinctId)
    def person_distinct_id_created(sender, instance: PersonDistinctId, created,
                                   **kwargs):
        create_person_distinct_id(instance.pk, instance.team.pk,
                                  instance.distinct_id,
예제 #9
0
    store_names_and_properties(team=team, event=event, properties=properties)

    # # determine create events
    create_event(
        event_uuid=event_uuid,
        event=event,
        properties=properties,
        timestamp=timestamp,
        team=team,
        distinct_id=distinct_id,
        elements=elements_list,
    )


if check_ee_enabled():

    @shared_task
    def process_event_ee(
        distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str],
    ) -> None:
        properties = data.get("properties", data.get("$set", {}))
        person_uuid = UUIDT()
        event_uuid = UUIDT()
        ts = handle_timestamp(data, now, sent_at)

        _capture_ee(
            event_uuid=event_uuid,
            person_uuid=person_uuid,
            ip=ip,
            site_url=site_url,
예제 #10
0
def get_event(request):
    now = timezone.now()
    try:
        data_from_request = load_data_from_request(request)
        data = data_from_request["data"]
    except TypeError:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Malformed request data. Make sure you're sending valid JSON.",
                },
                status=400,
            ),
        )
    if not data:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "No data found. Make sure to use a POST request when sending the payload in the body of the request.",
                },
                status=400,
            ),
        )
    sent_at = _get_sent_at(data, request)

    token = _get_token(data, request)
    is_personal_api_key = False
    if not token:
        token = PersonalAPIKeyAuthentication.find_key(
            request, data_from_request["body"],
            data if isinstance(data, dict) else None)
        is_personal_api_key = True
    if not token:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Neither api_key nor personal_api_key set. You can find your project API key in PostHog project settings.",
                },
                status=400,
            ),
        )

    team = Team.objects.get_team_from_token(token, is_personal_api_key)
    if team is None:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Project or personal API key invalid. You can find your project API key in PostHog project settings.",
                },
                status=400,
            ),
        )

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    for event in events:
        try:
            distinct_id = _get_distinct_id(event)
        except KeyError:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message":
                        "You need to set user distinct ID field `distinct_id`.",
                        "item": event,
                    },
                    status=400,
                ),
            )
        if "event" not in event:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "You need to set event name field `event`.",
                        "item": event,
                    },
                    status=400,
                ),
            )

        if check_ee_enabled():
            process_event_ee.delay(
                distinct_id=distinct_id,
                ip=get_ip_address(request),
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
            )
            # log the event to kafka write ahead log for processing
            log_event(
                distinct_id=distinct_id,
                ip=get_ip_address(request),
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
            )
        else:
            process_event.delay(
                distinct_id=distinct_id,
                ip=get_ip_address(request),
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
            )

    return cors_response(request, JsonResponse({"status": 1}))
예제 #11
0
    class TestInsightApi(TransactionBaseTest):
        TESTS_API = True

        def test_get_insight_items(self):
            filter_dict = {
                "events": [{
                    "id": "$pageview"
                }],
                "properties": [{
                    "key": "$browser",
                    "value": "Mac OS X"
                }],
            }

            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(),
                team=self.team,
                created_by=self.user)

            # create without user
            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(), team=self.team)

            response = self.client.get("/api/insight/", data={
                "user": "******"
            }).json()

            self.assertEqual(len(response["results"]), 1)

        def test_get_saved_insight_items(self):
            filter_dict = {
                "events": [{
                    "id": "$pageview"
                }],
                "properties": [{
                    "key": "$browser",
                    "value": "Mac OS X"
                }],
            }

            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(),
                saved=True,
                team=self.team,
                created_by=self.user)

            # create without saved
            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(),
                team=self.team,
                created_by=self.user)

            # create without user
            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(), team=self.team)

            response = self.client.get(
                "/api/insight/",
                data={
                    "saved": "true",
                    "user": "******",
                },
            ).json()

            self.assertEqual(len(response["results"]), 1)

        def test_create_insight_items(self):
            # Make sure the endpoint works with and without the trailing slash
            self.client.post(
                "/api/insight",
                data={
                    "filters": {
                        "events": [{
                            "id": "$pageview"
                        }],
                        "properties": [{
                            "key": "$browser",
                            "value": "Mac OS X"
                        }],
                    },
                },
                content_type="application/json",
            ).json()

            response = DashboardItem.objects.all()
            self.assertEqual(len(response), 1)
            self.assertListEqual(response[0].filters["events"],
                                 [{
                                     "id": "$pageview"
                                 }])

        # BASIC TESTING OF ENDPOINTS. /queries as in depth testing for each insight

        def test_insight_trends_basic(self):
            with freeze_time("2012-01-14T03:21:34.000Z"):
                event_factory(team=self.team,
                              event="$pageview",
                              distinct_id="1")
                event_factory(team=self.team,
                              event="$pageview",
                              distinct_id="2")

            with freeze_time("2012-01-15T04:01:34.000Z"):
                response = self.client.get(
                    "/api/insight/trend/?events={}".format(
                        json.dumps([{
                            "id": "$pageview"
                        }]))).json()

            self.assertEqual(response[0]["count"], 2)
            self.assertEqual(response[0]["action"]["name"], "$pageview")

        def test_insight_session_basic(self):
            with freeze_time("2012-01-14T03:21:34.000Z"):
                event_factory(team=self.team,
                              event="1st action",
                              distinct_id="1")
                event_factory(team=self.team,
                              event="1st action",
                              distinct_id="2")
            with freeze_time("2012-01-14T03:25:34.000Z"):
                event_factory(team=self.team,
                              event="2nd action",
                              distinct_id="1")
                event_factory(team=self.team,
                              event="2nd action",
                              distinct_id="2")
            with freeze_time("2012-01-15T03:59:34.000Z"):
                event_factory(team=self.team,
                              event="3rd action",
                              distinct_id="2")
            with freeze_time("2012-01-15T03:59:35.000Z"):
                event_factory(team=self.team,
                              event="3rd action",
                              distinct_id="1")
            with freeze_time("2012-01-15T04:01:34.000Z"):
                event_factory(team=self.team,
                              event="4th action",
                              distinct_id="1",
                              properties={"$os": "Mac OS X"})
                event_factory(team=self.team,
                              event="4th action",
                              distinct_id="2",
                              properties={"$os": "Windows 95"})

            with freeze_time("2012-01-15T04:01:34.000Z"):
                response = self.client.get("/api/insight/session/", ).json()

            self.assertEqual(len(response["result"]), 2)

            response = self.client.get(
                "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-15",
            ).json()
            self.assertEqual(len(response["result"]), 4)

            for i in range(46):
                with freeze_time(
                        relative_date_parse("2012-01-15T04:01:34.000Z") +
                        relativedelta(hours=i)):
                    event_factory(team=self.team,
                                  event="action {}".format(i),
                                  distinct_id=str(i + 3))

            response = self.client.get(
                "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-17",
            ).json()
            self.assertEqual(len(response["result"]), 50)
            self.assertEqual(response.get("offset", None), None)

            for i in range(2):
                with freeze_time(
                        relative_date_parse("2012-01-15T04:01:34.000Z") +
                        relativedelta(hours=i + 46)):
                    event_factory(team=self.team,
                                  event="action {}".format(i),
                                  distinct_id=str(i + 49))

            response = self.client.get(
                "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-17",
            ).json()
            self.assertEqual(len(response["result"]), 50)
            self.assertEqual(response["offset"], 50)

            response = self.client.get(
                "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-17&offset=50",
            ).json()
            self.assertEqual(len(response["result"]), 2)
            self.assertEqual(response.get("offset", None), None)

        # TODO: remove this check
        if not check_ee_enabled():

            @override_settings(CELERY_TASK_ALWAYS_EAGER=True)
            def test_insight_funnels_basic(self):
                event_factory(team=self.team,
                              event="user signed up",
                              distinct_id="1")
                response = self.client.get(
                    "/api/insight/funnel/?events={}".format(
                        json.dumps([
                            {
                                "id": "user signed up",
                                "type": "events",
                                "order": 0
                            },
                        ]))).json()
                self.assertEqual(response["loading"], True)

            # TODO: remove this check
            def test_insight_retention_basic(self):
                person1 = person_factory(
                    team=self.team,
                    distinct_ids=["person1"],
                    properties={"email": "*****@*****.**"})
                event_factory(
                    team=self.team,
                    event="$pageview",
                    distinct_id="person1",
                    timestamp=timezone.now() - timedelta(days=11),
                )

                event_factory(
                    team=self.team,
                    event="$pageview",
                    distinct_id="person1",
                    timestamp=timezone.now() - timedelta(days=10),
                )
                response = self.client.get("/api/insight/retention/", ).json()

                self.assertEqual(len(response["data"]), 11)
                self.assertEqual(response["data"][0]["values"][0]["count"], 1)

        def test_insight_paths_basic(self):
            person1 = person_factory(team=self.team, distinct_ids=["person_1"])
            event_factory(
                properties={"$current_url": "/"},
                distinct_id="person_1",
                event="$pageview",
                team=self.team,
            )
            event_factory(
                properties={"$current_url": "/about"},
                distinct_id="person_1",
                event="$pageview",
                team=self.team,
            )

            response = self.client.get("/api/insight/path", ).json()
            self.assertEqual(len(response), 1)
예제 #12
0
 def _match_distinct_id(self, distinct_id: str) -> bool:
     if check_ee_enabled():
         return self._query_clickhouse(distinct_id)
     return self._query_postgres(distinct_id)
예제 #13
0
def get_event(request):
    timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, ))
    timer.start()
    now = timezone.now()
    try:
        data_from_request = load_data_from_request(request)
        data = data_from_request["data"]
    except TypeError:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Malformed request data. Make sure you're sending valid JSON.",
                },
                status=400,
            ),
        )
    if not data:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "No data found. Make sure to use a POST request when sending the payload in the body of the request.",
                },
                status=400,
            ),
        )
    sent_at = _get_sent_at(data, request)

    token = _get_token(data, request)
    is_personal_api_key = False
    if not token:
        token = PersonalAPIKeyAuthentication.find_key(
            request, data_from_request["body"],
            data if isinstance(data, dict) else None)
        is_personal_api_key = True
    if not token:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Neither api_key nor personal_api_key set. You can find your project API key in PostHog project settings.",
                },
                status=400,
            ),
        )

    team = Team.objects.get_team_from_token(token, is_personal_api_key)
    if team is None:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Project or personal API key invalid. You can find your project API key in PostHog project settings.",
                },
                status=400,
            ),
        )

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    for event in events:
        try:
            distinct_id = _get_distinct_id(event)
        except KeyError:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message":
                        "You need to set user distinct ID field `distinct_id`.",
                        "item": event,
                    },
                    status=400,
                ),
            )
        if "event" not in event:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "You need to set event name field `event`.",
                        "item": event,
                    },
                    status=400,
                ),
            )

        if check_ee_enabled():
            process_event_ee(
                distinct_id=distinct_id,
                ip=get_ip_address(request),
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
            )
        else:
            task_name = "posthog.tasks.process_event.process_event"
            celery_queue = settings.CELERY_DEFAULT_QUEUE
            if team.plugins_opt_in:
                task_name += "_with_plugins"
                celery_queue = settings.PLUGINS_CELERY_QUEUE

            celery_app.send_task(
                name=task_name,
                queue=celery_queue,
                args=[
                    distinct_id,
                    get_ip_address(request),
                    request.build_absolute_uri("/")[:-1],
                    event,
                    team.id,
                    now.isoformat(),
                    sent_at,
                ],
            )

        if check_ee_enabled() and settings.LOG_TO_WAL:
            # log the event to kafka write ahead log for processing
            log_event(
                distinct_id=distinct_id,
                ip=get_ip_address(request),
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
            )
    timer.stop("event_endpoint")
    return cors_response(request, JsonResponse({"status": 1}))