Example #1
0
    def test_status_report_multiple_ids_per_person(self) -> None:
        person_id1 = str(UUIDT())
        person_id2 = str(UUIDT())

        create_person_distinct_id(self.team.id, "id1", person_id1)
        create_person_distinct_id(self.team.id, "id2", person_id1)
        create_person_distinct_id(self.team.id, "id3", person_id1)
        create_person_distinct_id(self.team.id, "id4", person_id1)
        create_person_distinct_id(self.team.id, "id5", person_id1)

        create_person_distinct_id(self.team.id, "id6", person_id2)
        create_person_distinct_id(self.team.id, "id7", person_id2)
        create_person_distinct_id(self.team.id, "id8", person_id2)

        report = status_report(
            dry_run=True).get("teams")[self.team.id]  # type: ignore

        multiple_ids_report = report["multiple_ids_per_person"]

        expected_result = {
            "total_persons_with_more_than_2_ids": 2,
            "max_distinct_ids_for_one_person": 5,
        }

        self.assertEqual(multiple_ids_report, expected_result)
Example #2
0
def log_event_to_dead_letter_queue(
    raw_payload: Dict,
    event_name: str,
    event: Dict,
    error_message: str,
    error_location: str,
    topic: str = KAFKA_DEAD_LETTER_QUEUE,
):
    data = event.copy()

    data["error_timestamp"] = datetime.now().isoformat()
    data["error_location"] = error_location
    data["error"] = error_message
    data["elements_chain"] = ""
    data["id"] = str(UUIDT())
    data["event"] = event_name
    data["raw_payload"] = json.dumps(raw_payload)
    data["now"] = datetime.fromisoformat(data["now"]).replace(tzinfo=None).isoformat() if data["now"] else None
    data["tags"] = ["django_server"]
    data["event_uuid"] = event["uuid"]
    del data["uuid"]

    try:
        KafkaProducer().produce(topic=topic, data=data)
        statsd.incr(settings.EVENTS_DEAD_LETTER_QUEUE_STATSD_METRIC)
    except Exception as e:
        capture_exception(e)
        statsd.incr("events_dead_letter_queue_produce_error")

        if settings.DEBUG:
            print("Failed to produce to events dead letter queue with error:", e)
Example #3
0
def attempt_migration_rollback(migration_instance: AsyncMigration):
    """
    Cycle through the operations in reverse order starting from the last completed op and run
    the specified rollback statements.
    """
    migration_instance.refresh_from_db()
    ops = get_async_migration_definition(migration_instance.name).operations
    # if the migration was completed the index is set 1 after, normally we should try rollback for current op
    current_index = min(migration_instance.current_operation_index,
                        len(ops) - 1)
    for op_index in range(current_index, -1, -1):
        try:
            op = ops[op_index]
            execute_op(op, str(UUIDT()), rollback=True)
        except Exception as e:
            error = f"At operation {op_index} rollback failed with error:{str(e)}"
            process_error(
                migration_instance=migration_instance,
                error=error,
                rollback=False,
                alert=True,
                current_operation_index=op_index,
            )

            return

    update_async_migration(migration_instance=migration_instance,
                           status=MigrationStatus.RolledBack,
                           progress=0)
Example #4
0
def capture_internal(event, distinct_id, ip, site_url, now, sent_at, team_id):
    event_uuid = UUIDT()

    if is_clickhouse_enabled():
        log_event(
            distinct_id=distinct_id,
            ip=ip,
            site_url=site_url,
            data=event,
            team_id=team_id,
            now=now,
            sent_at=sent_at,
            event_uuid=event_uuid,
        )
    else:
        task_name = "posthog.tasks.process_event.process_event_with_plugins"
        celery_queue = settings.PLUGINS_CELERY_QUEUE
        celery_app.send_task(
            name=task_name,
            queue=celery_queue,
            args=[
                distinct_id,
                ip,
                site_url,
                event,
                team_id,
                now.isoformat(),
                sent_at,
            ],
        )
Example #5
0
        def test_simple_log_is_fetched(self):
            plugin_server_instance_id = str(UUIDT())

            some_plugin: Plugin = Plugin.objects.create(
                organization=self.organization)
            some_plugin_config: PluginConfig = PluginConfig.objects.create(
                plugin=some_plugin, order=1)

            plugin_log_entry_factory(
                team_id=self.team.pk,
                plugin_id=some_plugin.pk,
                plugin_config_id=some_plugin_config.pk,
                source=PluginLogEntry.Source.CONSOLE,
                type=PluginLogEntry.Type.INFO,
                message="Something happened!",
                instance_id=plugin_server_instance_id,
            )

            results = fetch_plugin_log_entries(
                plugin_config_id=some_plugin_config.pk,
                after=timezone.datetime.min,
                before=timezone.now() + timezone.timedelta(seconds=5),
            )

            self.assertEqual(len(results), 1)
            self.assertEqual(results[0].message, "Something happened!")
Example #6
0
        def test_log_limit_works(self):
            plugin_server_instance_id = str(UUIDT())

            some_plugin: Plugin = Plugin.objects.create(
                organization=self.organization)
            some_plugin_config: PluginConfig = PluginConfig.objects.create(
                plugin=some_plugin, order=1)

            plugin_log_entry_factory(
                team_id=self.team.pk,
                plugin_id=some_plugin.pk,
                plugin_config_id=some_plugin_config.pk,
                source=PluginLogEntry.Source.CONSOLE,
                type=PluginLogEntry.Type.INFO,
                message="Something happened!",
                instance_id=plugin_server_instance_id,
            )
            plugin_log_entry_factory(
                team_id=self.team.pk,
                plugin_id=some_plugin.pk,
                plugin_config_id=some_plugin_config.pk,
                source=PluginLogEntry.Source.CONSOLE,
                type=PluginLogEntry.Type.ERROR,
                message="Random error",
                instance_id=plugin_server_instance_id,
            )

            results = fetch_plugin_log_entries(
                plugin_config_id=some_plugin_config.pk, limit=1)

            self.assertEqual(len(results), 1)
            self.assertEqual(results[0].message, "Random error")
Example #7
0
def emit_omni_person(
    event_uuid: UUID,
    team_id: int,
    distinct_id: str,
    uuid: Optional[UUID] = None,
    properties: Optional[Dict] = {},
    sync: bool = False,
    is_identified: bool = False,
    timestamp: Optional[datetime.datetime] = None,
) -> UUID:
    if not uuid:
        uuid = UUIDT()

    if not timestamp:
        timestamp = now()

    data = {
        "event_uuid": str(event_uuid),
        "uuid": str(uuid),
        "distinct_id": distinct_id,
        "team_id": team_id,
        "properties": json.dumps(properties),
        "is_identified": int(is_identified),
        "ts": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
    }
    p = KafkaProducer()
    p.produce(topic=KAFKA_OMNI_PERSON, data=data)
    return uuid
Example #8
0
    def _generate_psql_data(self, team, n_events, n_days):
        distinct_ids = []
        for i in range(0, n_events):
            distinct_id = str(UUIDT())
            distinct_ids.append(distinct_id)
            Person.objects.create(team=team,
                                  distinct_ids=[distinct_id],
                                  properties={"is_demo": True})

        Event.objects.bulk_create(
            Event(
                event="$purchase",
                team=team,
                distinct_id=distinct_ids[i],
                properties={
                    "plan":
                    PRICING_TIERS[_deterministic_random_value(
                        distinct_ids[i])][0],
                    "purchase_value":
                    PRICING_TIERS[_deterministic_random_value(distinct_ids[i])]
                    [1],
                },
                timestamp=now() -
                relativedelta(days=random.randint(0, n_days)),
            ) for i in range(0, n_events))
Example #9
0
    def create_people(self):
        self.people = [self.make_person(i) for i in range(self.n_people)]
        self.distinct_ids = [str(UUIDT()) for _ in self.people]
        self.people = Person.objects.bulk_create(self.people)

        pids = [
            PersonDistinctId(team=self.team,
                             person=person,
                             distinct_id=distinct_id)
            for person, distinct_id in zip(self.people, self.distinct_ids)
        ]
        PersonDistinctId.objects.bulk_create(pids)
        from ee.clickhouse.models.person import create_person, create_person_distinct_id

        for person in self.people:
            create_person(
                uuid=str(person.uuid),
                team_id=person.team.pk,
                properties=person.properties,
                is_identified=person.is_identified,
            )
        for pid in pids:
            create_person_distinct_id(
                pid.team.pk, pid.distinct_id,
                str(pid.person.uuid))  # use dummy number for id
Example #10
0
def create_person(
    team_id: int,
    uuid: Optional[str] = None,
    properties: Optional[Dict] = {},
    sync: bool = False,
    is_identified: bool = False,
    timestamp: Optional[datetime.datetime] = None,
) -> str:
    if uuid:
        uuid = str(uuid)
    else:
        uuid = str(UUIDT())
    if not timestamp:
        timestamp = now()

    data = {
        "id": str(uuid),
        "team_id": team_id,
        "properties": json.dumps(properties),
        "is_identified": int(is_identified),
        "timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S"),
    }
    p = ClickhouseProducer()
    p.produce(topic=KAFKA_PERSON, sql=INSERT_PERSON_SQL, data=data, sync=sync)
    return uuid
Example #11
0
def create_element(
    element: Element,
    team: Team,
    event_uuid: UUID,
    elements_hash: str,
    timestamp: Optional[datetime.datetime] = None,
) -> None:
    if not timestamp:
        timestamp = now()
    data = {
        "uuid": str(UUIDT()),
        "event_uuid": str(event_uuid),
        "created_at": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
        "text": element.text or "",
        "tag_name": element.tag_name or "",
        "href": element.href or "",
        "attr_id": element.attr_id or "",
        "attr_class": element.attr_class or [],
        "nth_child": element.nth_child or 0,
        "nth_of_type": element.nth_of_type or 0,
        "attributes": json.dumps(element.attributes or {}),
        "order": element.order or 0,
        "team_id": team.pk,
        "elements_hash": elements_hash,
    }
    p = ClickhouseProducer()
    p.produce(topic=KAFKA_ELEMENTS, sql=INSERT_ELEMENTS_SQL, data=data)
    def test_old_logs_are_deleted_while_newer_ones_kept(self) -> None:
        plugin_server_instance_id = str(UUIDT())
        now = timezone.now()

        some_plugin: Plugin = Plugin.objects.create(
            organization=self.organization)
        some_plugin_config: PluginConfig = PluginConfig.objects.create(
            plugin=some_plugin, order=1)

        for days_before in [0, 2, 6, 9, 31]:
            PluginLogEntry.objects.create(
                team_id=self.team.pk,
                plugin_id=some_plugin.pk,
                plugin_config_id=some_plugin_config.pk,
                type=PluginLogEntry.Type.INFO,
                message="Test",
                instance_id=plugin_server_instance_id,
                timestamp=now - timezone.timedelta(days_before),
            )

        self.assertEqual(PluginLogEntry.objects.count(), 5)

        delete_old_plugin_logs()

        self.assertEqual(PluginLogEntry.objects.count(), 3)
Example #13
0
def run_query(fn, *args):
    uuid = str(UUIDT())
    client._request_information = {"kind": "benchmark", "id": f"{uuid}::${fn.__name__}"}
    try:
        fn(*args)
        return get_clickhouse_query_stats(uuid)
    finally:
        client._request_information = None
Example #14
0
    def test_create_cache(self) -> None:
        self.assertEqual(len(get_all_elements()), 0)

        create_elements(
            event_uuid=UUIDT(),
            team=self.team,
            elements=[
                Element(tag_name="a",
                        href="/a-url",
                        nth_child=1,
                        nth_of_type=0),
                Element(tag_name="button", nth_child=0, nth_of_type=0),
                Element(tag_name="div", nth_child=0, nth_of_type=0),
                Element(
                    tag_name="div",
                    nth_child=0,
                    nth_of_type=0,
                    attr_id="nested",
                ),
            ],
            use_cache=True,
        )

        self.assertEqual(len(get_all_elements()), 4)

        create_elements(
            event_uuid=UUIDT(),
            team=self.team,
            elements=[
                Element(tag_name="a",
                        href="/a-url",
                        nth_child=1,
                        nth_of_type=0),
                Element(tag_name="button", nth_child=0, nth_of_type=0),
                Element(tag_name="div", nth_child=0, nth_of_type=0),
                Element(
                    tag_name="div",
                    nth_child=0,
                    nth_of_type=0,
                    attr_id="nested",
                ),
            ],
            use_cache=True,
        )

        self.assertEqual(len(get_all_elements()), 4)
Example #15
0
    def process_event_ee(
        distinct_id: str,
        ip: str,
        site_url: str,
        data: dict,
        team_id: int,
        now: datetime.datetime,
        sent_at: Optional[datetime.datetime],
    ) -> None:
        timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, ))
        timer.start()
        properties = data.get("properties", {})
        if data.get("$set"):
            properties["$set"] = data["$set"]

        person_uuid = UUIDT()
        event_uuid = UUIDT()
        ts = handle_timestamp(data, now, sent_at)
        handle_identify_or_alias(data["event"], properties, distinct_id,
                                 team_id)

        if data["event"] == "$snapshot":
            create_session_recording_event(
                uuid=event_uuid,
                team_id=team_id,
                distinct_id=distinct_id,
                session_id=properties["$session_id"],
                snapshot_data=properties["$snapshot_data"],
                timestamp=ts,
            )
            return

        _capture_ee(
            event_uuid=event_uuid,
            person_uuid=person_uuid,
            ip=ip,
            site_url=site_url,
            team_id=team_id,
            event=data["event"],
            distinct_id=distinct_id,
            properties=properties,
            timestamp=ts,
        )
        timer.stop("process_event_ee")
Example #16
0
    def process_event_ee(
        distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str],
    ) -> None:
        properties = data.get("properties", data.get("$set", {}))
        person_uuid = UUIDT()
        event_uuid = UUIDT()
        ts = handle_timestamp(data, now, sent_at)

        _capture_ee(
            event_uuid=event_uuid,
            person_uuid=person_uuid,
            ip=ip,
            site_url=site_url,
            team_id=team_id,
            event=data["event"],
            distinct_id=distinct_id,
            properties=properties,
            timestamp=ts,
        )
Example #17
0
def _create_person(**kwargs) -> Person:
    if kwargs.get("uuid"):
        uuid = str(kwargs.pop("uuid"))
    else:
        uuid = str(UUIDT())
    distinct_ids = kwargs.pop("distinct_ids")
    person = create_person(uuid=uuid, **kwargs)
    for id in distinct_ids:
        create_person_distinct_id(0, kwargs["team_id"], id, str(person))
    return Person(id=person, uuid=person)
Example #18
0
    def create_people(self):
        self.people = [self.make_person(i) for i in range(self.n_people)]
        self.distinct_ids = [str(UUIDT()) for _ in self.people]

        Person.objects.bulk_create(self.people)
        PersonDistinctId.objects.bulk_create([
            PersonDistinctId(team=self.team,
                             person=person,
                             distinct_id=distinct_id)
            for person, distinct_id in zip(self.people, self.distinct_ids)
        ])
Example #19
0
def run_async_migration_next_op(
        migration_name: str,
        migration_instance: Optional[AsyncMigration] = None):
    """
    Runs the next operation specified by the currently running migration
    We run the next operation of the migration which needs attention

    Returns (run_next, success)
    Terminology:
    - migration_instance: The migration object as stored in the DB
    - migration_definition: The actual migration class outlining the operations (e.g. async_migrations/examples/example.py)
    """

    if not migration_instance:
        try:
            migration_instance = AsyncMigration.objects.get(
                name=migration_name, status=MigrationStatus.Running)
        except AsyncMigration.DoesNotExist:
            return (False, False)
    else:
        migration_instance.refresh_from_db()

    assert migration_instance is not None

    migration_definition = get_async_migration_definition(migration_name)
    if migration_instance.current_operation_index > len(
            migration_definition.operations) - 1:
        complete_migration(migration_instance)
        return (False, True)

    error = None
    current_query_id = str(UUIDT())

    try:
        op = migration_definition.operations[
            migration_instance.current_operation_index]

        execute_op(op, current_query_id)
        update_async_migration(
            migration_instance=migration_instance,
            current_query_id=current_query_id,
            current_operation_index=migration_instance.current_operation_index
            + 1,
        )

    except Exception as e:
        error = f"Exception was thrown while running operation {migration_instance.current_operation_index} : {str(e)}"
        process_error(migration_instance, error, alert=True)

    if error:
        return (False, False)

    update_migration_progress(migration_instance)
    return (True, False)
Example #20
0
    def process_event_ee(
        distinct_id: str,
        ip: str,
        site_url: str,
        data: dict,
        team_id: int,
        now: str,
        sent_at: Optional[str],
    ) -> None:
        properties = data.get("properties", {})
        if data.get("$set"):
            properties["$set"] = data["$set"]

        person_uuid = UUIDT()
        event_uuid = UUIDT()
        ts = handle_timestamp(data, now, sent_at)
        handle_identify_or_alias(data["event"], properties, distinct_id,
                                 team_id)

        if data["event"] == "$snapshot":
            create_session_recording_event(
                uuid=event_uuid,
                team_id=team_id,
                distinct_id=distinct_id,
                session_id=properties["$session_id"],
                snapshot_data=properties["$snapshot_data"],
                timestamp=ts,
            )
            return

        _capture_ee(
            event_uuid=event_uuid,
            person_uuid=person_uuid,
            ip=ip,
            site_url=site_url,
            team_id=team_id,
            event=data["event"],
            distinct_id=distinct_id,
            properties=properties,
            timestamp=ts,
        )
Example #21
0
    def test_can_not_save_if_there_is_neither_a_team_id_nor_an_organisation_id(self):
        # even when there are logs with team id or org id saved
        ActivityLog.objects.create(team_id=3)
        ActivityLog.objects.create(organization_id=UUIDT())
        # we cannot save a new log if it has neither team nor org id
        with self.assertRaises(IntegrityError) as error:
            ActivityLog.objects.create()

        self.assertIn(
            'new row for relation "posthog_activitylog" violates check constraint "must_have_team_or_organization_id',
            error.exception.args[0],
        )
Example #22
0
    def test_status_report_duplicate_distinct_ids(self) -> None:
        create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))

        for index in range(0, 2):
            sync_execute(
                "INSERT INTO person_distinct_id SELECT %(distinct_id)s, %(person_id)s, %(team_id)s, 1, %(timestamp)s, 0 VALUES",
                {
                    "distinct_id": "duplicate_id_old",
                    "person_id": str(UUIDT()),
                    "team_id": self.team.id,
                    "timestamp": "2020-01-01 12:01:0%s" % index,
                },
            )

        report = status_report(dry_run=True).get("teams")[self.team.id]  # type: ignore

        duplicate_ids_report = report["duplicate_distinct_ids"]

        expected_result = {
            "prev_total_ids_with_duplicates": 1,
            "prev_total_extra_distinct_id_rows": 1,
            "new_total_ids_with_duplicates": 2,
            "new_total_extra_distinct_id_rows": 4,
        }

        self.assertEqual(duplicate_ids_report, expected_result)
Example #23
0
    def populate_session_recording(self, person: Person, distinct_id: str,
                                   index: int):
        if index != 0:
            return

        date = now()
        start_time = self.demo_recording["result"]["snapshots"][0]["timestamp"]
        session_id = str(UUIDT())
        window_id = str(UUIDT())

        for snapshot in self.demo_recording["result"]["snapshots"]:
            self.snapshots.append({
                "session_id":
                session_id,
                "window_id":
                window_id,
                "distinct_id":
                distinct_id,
                "timestamp":
                date +
                timedelta(milliseconds=snapshot["timestamp"] - start_time),
                "snapshot_data":
                snapshot,
            })
Example #24
0
    def _generate_ch_data(self, team, n_events, n_days):
        distinct_ids = []
        for i in range(0, n_events):
            distinct_id = str(UUIDT())
            distinct_ids.append(distinct_id)
            Person.objects.create(team=team, distinct_ids=[distinct_id], properties={"is_demo": True})

        for i in range(0, n_events):
            event_uuid = uuid4()
            plan = random.choice(PRICING_TIERS)
            create_event(
                event="$purchase",
                team=team,
                distinct_id=distinct_ids[i],
                properties={"plan": plan[0], "purchase_value": plan[1],},
                timestamp=now() - relativedelta(days=random.randint(0, n_days)),
                event_uuid=event_uuid,
            )
Example #25
0
def plugin_log_factory_ch(*, team_id: int, plugin_id: int,
                          plugin_config_id: int, source: PluginLogEntry.Source,
                          type: PluginLogEntry.Type, message: str,
                          instance_id: str):
    sync_execute(
        INSERT_PLUGIN_LOG_ENTRY_SQL,
        {
            "id": UUIDT(),
            "team_id": team_id,
            "plugin_id": plugin_id,
            "plugin_config_id": plugin_config_id,
            "source": source,
            "type": type,
            "instance_id": instance_id,
            "message": message,
            "timestamp": timezone.now().strftime("%Y-%m-%dT%H:%M:%S.%f"),
        },
    )
Example #26
0
def _process_event_ee(
    distinct_id: str,
    ip: str,
    site_url: str,
    data: dict,
    team_id: int,
    now: str,
    sent_at: Optional[str],
) -> None:
    return process_event_ee(
        distinct_id=distinct_id,
        ip=ip,
        site_url=site_url,
        data=data,
        team_id=team_id,
        now=parser.isoparse(now),
        sent_at=parser.isoparse(sent_at) if sent_at else None,
        event_uuid=UUIDT(),
    )
Example #27
0
def capture_internal(event,
                     distinct_id,
                     ip,
                     site_url,
                     now,
                     sent_at,
                     team_id,
                     event_uuid=UUIDT()) -> None:
    parsed_event = parse_kafka_event_data(
        distinct_id=distinct_id,
        ip=ip,
        site_url=site_url,
        data=event,
        team_id=team_id,
        now=now,
        sent_at=sent_at,
        event_uuid=event_uuid,
    )
    partition_key = hashlib.sha256(
        f"{team_id}:{distinct_id}".encode()).hexdigest()
    log_event(parsed_event, event["event"], partition_key=partition_key)
Example #28
0
    def test_does_not_throw_if_cannot_log_activity(self):
        with self.assertLogs(level="WARN") as log:
            try:
                log_activity(
                    organization_id=UUIDT(),
                    team_id=1,
                    # will cause logging to raise exception because user is unsaved
                    # avoids needing to mock anything to force the exception
                    user=User(first_name="testy", email="*****@*****.**"),
                    item_id="12345",
                    scope="testing throwing exceptions on create",
                    activity="does not explode",
                    detail=Detail(),
                )
            except Exception as e:
                raise pytest.fail(f"Should not have raised exception: {e}")

            logged_warning = log.records[0].__dict__
            self.assertEqual(logged_warning["levelname"], "WARNING")
            self.assertEqual(logged_warning["msg"]["event"], "failed to write activity log")
            self.assertEqual(logged_warning["msg"]["scope"], "testing throwing exceptions on create")
            self.assertEqual(logged_warning["msg"]["team"], 1)
            self.assertEqual(logged_warning["msg"]["activity"], "does not explode")
            self.assertIsInstance(logged_warning["msg"]["exception"], ValueError)
Example #29
0
def get_event(request):
    timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, ))
    timer.start()
    now = timezone.now()
    try:
        data_from_request = load_data_from_request(request)
        data = data_from_request["data"]
    except TypeError:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Malformed request data. Make sure you're sending valid JSON.",
                },
                status=400,
            ),
        )
    if not data:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "No data found. Make sure to use a POST request when sending the payload in the body of the request.",
                },
                status=400,
            ),
        )
    sent_at = _get_sent_at(data, request)

    token = _get_token(data, request)

    if not token:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "API key not provided. You can find your project API key in PostHog project settings.",
                },
                status=401,
            ),
        )
    team = Team.objects.get_team_from_token(token)

    if team is None:
        try:
            project_id = _get_project_id(data, request)
        except:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "Invalid project ID.",
                    },
                    status=400,
                ),
            )
        if not project_id:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code":
                        "validation",
                        "message":
                        "Project API key invalid. You can find your project API key in PostHog project settings.",
                    },
                    status=401,
                ),
            )
        user = User.objects.get_from_personal_api_key(token)
        if user is None:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "Personal API key invalid.",
                    },
                    status=401,
                ),
            )
        team = user.teams.get(id=project_id)

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    for event in events:
        try:
            distinct_id = _get_distinct_id(event)
        except KeyError:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message":
                        "You need to set user distinct ID field `distinct_id`.",
                        "item": event,
                    },
                    status=400,
                ),
            )
        if not event.get("event"):
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "You need to set event name field `event`.",
                        "item": event,
                    },
                    status=400,
                ),
            )

        if not event.get("properties"):
            event["properties"] = {}

        _ensure_web_feature_flags_in_properties(event, team, distinct_id)

        event_uuid = UUIDT()

        if is_ee_enabled():
            log_topics = [KAFKA_EVENTS_WAL]

            if settings.PLUGIN_SERVER_INGESTION:
                log_topics.append(KAFKA_EVENTS_PLUGIN_INGESTION)
                statsd.Counter("%s_posthog_cloud_plugin_server_ingestion" %
                               (settings.STATSD_PREFIX, )).increment()

            log_event(
                distinct_id=distinct_id,
                ip=get_ip_address(request),
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
                event_uuid=event_uuid,
                topics=log_topics,
            )

            # must done after logging because process_event_ee modifies the event, e.g. by removing $elements
            if not settings.PLUGIN_SERVER_INGESTION:
                process_event_ee(
                    distinct_id=distinct_id,
                    ip=get_ip_address(request),
                    site_url=request.build_absolute_uri("/")[:-1],
                    data=event,
                    team_id=team.id,
                    now=now,
                    sent_at=sent_at,
                    event_uuid=event_uuid,
                )
        else:
            task_name = "posthog.tasks.process_event.process_event"
            if settings.PLUGIN_SERVER_INGESTION or team.plugins_opt_in:
                task_name += "_with_plugins"
                celery_queue = settings.PLUGINS_CELERY_QUEUE
            else:
                celery_queue = settings.CELERY_DEFAULT_QUEUE

            celery_app.send_task(
                name=task_name,
                queue=celery_queue,
                args=[
                    distinct_id,
                    get_ip_address(request),
                    request.build_absolute_uri("/")[:-1],
                    event,
                    team.id,
                    now.isoformat(),
                    sent_at,
                ],
            )
    timer.stop("event_endpoint")
    return cors_response(request, JsonResponse({"status": 1}))
Example #30
0
def _create_anonymous_users(team: Team, base_url: str) -> None:
    with open(Path("posthog/demo_data.json").resolve(), "r") as demo_data_file:
        demo_data = json.load(demo_data_file)

    Person.objects.bulk_create([Person(team=team, properties={"is_demo": True}) for _ in range(0, 100)])
    distinct_ids: List[PersonDistinctId] = []
    events: List[Event] = []
    days_ago = 7
    demo_data_index = 0
    for index, person in enumerate(Person.objects.filter(team=team)):
        if index > 0 and index % 14 == 0:
            days_ago -= 1

        distinct_id = str(UUIDT())
        distinct_ids.append(PersonDistinctId(team=team, person=person, distinct_id=distinct_id))

        # Add first user more 3 distinct id's
        if index == 0:
            for _ in range(0, 3):
                distinct_ids.append(PersonDistinctId(team=team, person=person, distinct_id=str(UUIDT())))

        date = now() - relativedelta(days=days_ago)
        browser = random.choice(["Chrome", "Safari", "Firefox"])
        events.append(
            Event(
                team=team,
                event="$pageview",
                distinct_id=distinct_id,
                properties={"$current_url": base_url, "$browser": browser, "$lib": "web"},
                timestamp=date,
            )
        )
        if index % 3 == 0:
            person.properties.update(demo_data[demo_data_index])
            person.is_identified = True
            person.save()
            demo_data_index += 1
            Event.objects.create(
                team=team,
                distinct_id=distinct_id,
                event="$autocapture",
                properties={"$current_url": base_url, "$browser": browser, "$lib": "web", "$event_type": "click",},
                timestamp=date + relativedelta(seconds=14),
                elements=[
                    Element(
                        tag_name="a",
                        href="/demo/1",
                        attr_class=["btn", "btn-success"],
                        attr_id="sign-up",
                        text="Sign up",
                    ),
                    Element(tag_name="form", attr_class=["form"]),
                    Element(tag_name="div", attr_class=["container"]),
                    Element(tag_name="body"),
                    Element(tag_name="html"),
                ],
            )
            events.append(
                Event(
                    event="$pageview",
                    team=team,
                    distinct_id=distinct_id,
                    properties={"$current_url": "%s/1" % base_url, "$browser": browser, "$lib": "web",},
                    timestamp=date + relativedelta(seconds=15),
                )
            )
            if index % 4 == 0:
                Event.objects.create(
                    team=team,
                    event="$autocapture",
                    distinct_id=distinct_id,
                    properties={
                        "$current_url": "%s/1" % base_url,
                        "$browser": browser,
                        "$lib": "web",
                        "$event_type": "click",
                    },
                    timestamp=date + relativedelta(seconds=29),
                    elements=[
                        Element(tag_name="button", attr_class=["btn", "btn-success"], text="Sign up!",),
                        Element(tag_name="form", attr_class=["form"]),
                        Element(tag_name="div", attr_class=["container"]),
                        Element(tag_name="body"),
                        Element(tag_name="html"),
                    ],
                )
                events.append(
                    Event(
                        event="$pageview",
                        team=team,
                        distinct_id=distinct_id,
                        properties={"$current_url": "%s/2" % base_url, "$browser": browser, "$lib": "web",},
                        timestamp=date + relativedelta(seconds=30),
                    )
                )
                if index % 5 == 0:
                    Event.objects.create(
                        team=team,
                        event="$autocapture",
                        distinct_id=distinct_id,
                        properties={
                            "$current_url": "%s/2" % base_url,
                            "$browser": browser,
                            "$lib": "web",
                            "$event_type": "click",
                        },
                        timestamp=date + relativedelta(seconds=59),
                        elements=[
                            Element(tag_name="button", attr_class=["btn", "btn-success"], text="Pay $10",),
                            Element(tag_name="form", attr_class=["form"]),
                            Element(tag_name="div", attr_class=["container"]),
                            Element(tag_name="body"),
                            Element(tag_name="html"),
                        ],
                    )
                    events.append(
                        Event(
                            event="purchase",
                            team=team,
                            distinct_id=distinct_id,
                            properties={"price": 10},
                            timestamp=date + relativedelta(seconds=60),
                        )
                    )
                    events.append(
                        Event(
                            event="$pageview",
                            team=team,
                            distinct_id=distinct_id,
                            properties={"$current_url": "%s/3" % base_url, "$browser": browser, "$lib": "web",},
                            timestamp=date + relativedelta(seconds=60),
                        )
                    )
    team.event_properties_numerical.append("purchase")
    team.save()
    PersonDistinctId.objects.bulk_create(distinct_ids)
    Event.objects.bulk_create(events)