def _capture_ee( event_uuid: UUID, person_uuid: UUID, ip: str, site_url: str, team_id: int, event: str, distinct_id: str, properties: Dict, timestamp: datetime.datetime, ) -> None: elements = properties.get("$elements") elements_list = [] if elements: del properties["$elements"] elements_list = [ Element( text=el["$el_text"][0:400] if el.get("$el_text") else None, tag_name=el["tag_name"], href=el["attr__href"][0:2048] if el.get("attr__href") else None, attr_class=el["attr__class"].split(" ") if el.get("attr__class") else None, attr_id=el.get("attr__id"), nth_child=el.get("nth_child"), nth_of_type=el.get("nth_of_type"), attributes={ key: value for key, value in el.items() if key.startswith("attr__") }, ) for index, el in enumerate(elements) ] team = Team.objects.only("slack_incoming_webhook", "event_names", "event_properties", "anonymize_ips").get(pk=team_id) if not team.anonymize_ips and "$ip" not in properties: properties["$ip"] = ip store_names_and_properties(team=team, event=event, properties=properties) # # determine create events create_event( event_uuid=event_uuid, event=event, properties=properties, timestamp=timestamp, team=team, distinct_id=distinct_id, elements=elements_list, )
def _capture_ee( event_uuid: UUID, person_uuid: UUID, ip: str, site_url: str, team_id: int, event: str, distinct_id: str, properties: Dict, timestamp: datetime.datetime, ) -> None: elements = properties.get("$elements") elements_list = [] if elements: del properties["$elements"] elements_list = [ Element( text=el["$el_text"][0:400] if el.get("$el_text") else None, tag_name=el["tag_name"], href=el["attr__href"][0:2048] if el.get("attr__href") else None, attr_class=el["attr__class"].split(" ") if el.get("attr__class") else None, attr_id=el.get("attr__id"), nth_child=el.get("nth_child"), nth_of_type=el.get("nth_of_type"), attributes={ key: value for key, value in el.items() if key.startswith("attr__") }, ) for index, el in enumerate(elements) ] team = Team.objects.only("slack_incoming_webhook", "event_names", "event_properties", "anonymize_ips").get(pk=team_id) if not team.anonymize_ips and "$ip" not in properties: properties["$ip"] = ip store_names_and_properties(team=team, event=event, properties=properties) if not Person.objects.distinct_ids_exist(team_id=team_id, distinct_ids=[str(distinct_id)]): # Catch race condition where in between getting and creating, # another request already created this user try: Person.objects.create(team_id=team_id, distinct_ids=[str(distinct_id)]) except IntegrityError: pass # # determine create events create_event( event_uuid=event_uuid, event=event, properties=properties, timestamp=timestamp, team=team, distinct_id=distinct_id, elements=elements_list, site_url=site_url, )
def test_prop_selector_tag_name(self): _create_event( event="$autocapture", team=self.team, distinct_id="whatever", properties={"attr": "some_other_val"}, elements=[ Element( tag_name="a", href="/a-url", attr_class=["small"], text="bla bla", attributes={}, nth_child=1, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-primary"], nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="label", nth_child=0, nth_of_type=0, attr_id="nested", ), ], ) _create_event( event="$pageview", team=self.team, distinct_id="whatever", properties={"attr": "some_val"}, elements=[ Element( tag_name="a", href="/a-url", attr_class=["small"], text="bla bla", attributes={}, nth_child=1, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-secondary"], nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="img", nth_child=0, nth_of_type=0, attr_id="nested", ), ], ) # selector filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) filter = Filter( data={ "properties": [{ "key": "selector", "value": ".btn", "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn-primary"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn-secondary"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn-primary", ".btn-secondary"], "operator": "exact", "type": "element", }] }) self.assertEqual(len(self._run_query(filter)), 2) # tag_name filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["div"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": "div", "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["img"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["label"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["img", "label"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2)
def test_prop_element(self): _create_event( event="$autocapture", team=self.team, distinct_id="whatever", properties={"attr": "some_other_val"}, elements=[ Element( tag_name="a", href="/a-url", attr_class=["small"], text="bla bla", nth_child=1, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-primary"], nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="label", nth_child=0, nth_of_type=0, attr_id="nested", ), ], ) _create_event( event="$autocapture", team=self.team, distinct_id="whatever", properties={"attr": "some_val"}, elements=[ Element( tag_name="a", href="/a-url", attr_class=["small"], text='bla"bla', attributes={}, nth_child=1, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-secondary"], nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="img", nth_child=0, nth_of_type=0, attr_id="nested", ), ], ) _create_event( event="$autocapture", team=self.team, distinct_id="whatever", elements=[ Element( tag_name="a", href="/789", nth_child=0, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-tertiary"], nth_child=0, nth_of_type=0), ], ) # selector filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 3) filter = Filter( data={ "properties": [{ "key": "selector", "value": ".btn", "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 3) filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn-primary"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn-secondary"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "selector", "value": [".btn-primary", ".btn-secondary"], "operator": "exact", "type": "element", }] }) self.assertEqual(len(self._run_query(filter)), 2) filter_selector_exact_empty = Filter( data={ "properties": [{ "key": "selector", "value": [], "operator": "exact", "type": "element", }] }) self.assertEqual(len(self._run_query(filter_selector_exact_empty)), 0) filter_selector_is_not_empty = Filter( data={ "properties": [{ "key": "selector", "value": [], "operator": "is_not", "type": "element", }] }) self.assertEqual(len(self._run_query(filter_selector_is_not_empty)), 3) # tag_name filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["div"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": "div", "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["img"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["label"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 1) filter = Filter( data={ "properties": [{ "key": "tag_name", "value": ["img", "label"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter)), 2) # href/text filter_href_exact = Filter( data={ "properties": [{ "key": "href", "value": ["/a-url"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_exact)), 2) filter_href_exact_double = Filter( data={ "properties": [{ "key": "href", "value": ["/a-url", "/789"], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_exact_double)), 3) filter_href_exact_empty = Filter( data={ "properties": [{ "key": "href", "value": [], "operator": "exact", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_exact_empty)), 0) filter_href_is_not = Filter( data={ "properties": [{ "key": "href", "value": ["/a-url"], "operator": "is_not", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_is_not)), 1) filter_href_is_not_double = Filter( data={ "properties": [{ "key": "href", "value": ["/a-url", "/789"], "operator": "is_not", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_is_not_double)), 0) filter_href_is_not_empty = Filter( data={ "properties": [{ "key": "href", "value": [], "operator": "is_not", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_is_not_empty)), 3) filter_href_exact_with_tag_name_is_not = Filter( data={ "properties": [ { "key": "href", "value": ["/a-url"], "type": "element" }, { "key": "tag_name", "value": ["marquee"], "operator": "is_not", "type": "element" }, ] }) self.assertEqual( len(self._run_query(filter_href_exact_with_tag_name_is_not)), 2) filter_href_icontains = Filter( data={ "properties": [{ "key": "href", "value": ["UrL"], "operator": "icontains", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_icontains)), 2) filter_href_regex = Filter( data={ "properties": [{ "key": "href", "value": "/a-.+", "operator": "regex", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_regex)), 2) filter_href_not_regex = Filter( data={ "properties": [{ "key": "href", "value": r"/\d+", "operator": "not_regex", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_href_not_regex)), 2) filter_text_icontains_with_doublequote = Filter( data={ "properties": [{ "key": "text", "value": 'bla"bla', "operator": "icontains", "type": "element" }] }) self.assertEqual( len(self._run_query(filter_text_icontains_with_doublequote)), 1) filter_text_is_set = Filter( data={ "properties": [{ "key": "text", "value": "is_set", "operator": "is_set", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_text_is_set)), 2) filter_text_is_not_set = Filter( data={ "properties": [{ "key": "text", "value": "is_not_set", "operator": "is_not_set", "type": "element" }] }) self.assertEqual(len(self._run_query(filter_text_is_not_set)), 1)
def test_element(self): _create_event( event="$autocapture", team=self.team, distinct_id="whatever", properties={"attr": "some_other_val"}, elements=[ Element( tag_name="a", href="/a-url", attr_class=["small"], text="bla bla", attributes={}, nth_child=1, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-primary"], nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="label", nth_child=0, nth_of_type=0, attr_id="nested", ), ], ) _create_event( event="$pageview", team=self.team, distinct_id="whatever", properties={"attr": "some_val"}, elements=[ Element( tag_name="a", href="/a-url", attr_class=["small"], text="bla bla", attributes={}, nth_child=1, nth_of_type=0, ), Element(tag_name="button", attr_class=["btn", "btn-secondary"], nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="img", nth_child=0, nth_of_type=0, attr_id="nested", ), ], ) filter = Filter( data={ "events": [ { "id": "event_name", "order": 0 }, ], "properties": [{ "key": "tag_name", "value": ["label"], "operator": "exact", "type": "element" }], }) self._run_query(filter) self._run_query( filter.with_data({ "properties": [{ "key": "tag_name", "value": [], "operator": "exact", "type": "element" }], }))
def test_funnel_correlation_with_event_properties_autocapture(self): self.client.force_login(self.user) # Need a minimum of 3 hits to get a correlation result for i in range(3): create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="$autocapture", distinct_id=f"user_{i}", elements=[ Element(nth_of_type=1, nth_child=0, tag_name="a", href="/movie") ], timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "email", "$event_type": "click" }, ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) # Atleast one person that fails, to ensure we get results create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_fail", timestamp="2020-01-02T14:00:00Z", ) with freeze_time("2020-01-01"): response = get_funnel_correlation_ok( client=self.client, team_id=self.team.pk, request=FunnelCorrelationRequest( events=json.dumps([ EventPattern(id="user signed up"), EventPattern(id="paid") ]), date_to="2020-01-14", date_from="2020-01-01", funnel_correlation_type=FunnelCorrelationType. EVENT_WITH_PROPERTIES, funnel_correlation_event_names=json.dumps(["$autocapture" ]), ), ) assert response == { "result": { "events": [ { "success_count": 3, "failure_count": 0, "success_people_url": ANY, "failure_people_url": ANY, "odds_ratio": 8.0, "correlation_type": "success", "event": { "event": '$autocapture::elements_chain::click__~~__a:href="/movie"nth-child="0"nth-of-type="1"', "properties": { "$event_type": "click" }, "elements": [{ "event": None, "text": None, "tag_name": "a", "attr_class": None, "href": "/movie", "attr_id": None, "nth_child": 0, "nth_of_type": 1, "attributes": {}, "order": 0, }], }, }, ], "skewed": False, }, "last_refresh": "2020-01-01T00:00:00Z", "is_cached": False, } assert get_people_for_correlation_ok( client=self.client, correlation=response["result"]["events"][0]) == { "success": ["user_0", "user_1", "user_2"], "failure": [], }
def create_anonymous_users_ch(team: Team, base_url: str) -> None: with open(Path("posthog/demo_data.json").resolve(), "r") as demo_data_file: demo_data = json.load(demo_data_file) demo_data_index = 0 days_ago = 7 for index in range(0, 100): if index > 0 and index % 14 == 0: days_ago -= 1 date = now() - relativedelta(days=days_ago) browser = random.choice(["Chrome", "Safari", "Firefox"]) distinct_id = generate_clickhouse_uuid() person = Person.objects.create(team_id=team.pk, distinct_ids=[distinct_id], properties={"is_demo": True}) event_uuid = uuid4() create_event( team=team, event="$pageview", distinct_id=distinct_id, properties={ "$current_url": base_url, "$browser": browser, "$lib": "web", }, timestamp=date, event_uuid=event_uuid, ) if index % 3 == 0: update_person_properties(team_id=team.pk, id=person.uuid, properties=demo_data[demo_data_index]) update_person_is_identified(team_id=team.pk, id=person.uuid, is_identified=True) demo_data_index += 1 elements = [ Element( tag_name="a", href="/demo/1", attr_class=["btn", "btn-success"], attr_id="sign-up", text="Sign up", ), Element(tag_name="form", attr_class=["form"]), Element(tag_name="div", attr_class=["container"]), Element(tag_name="body"), Element(tag_name="html"), ] event_uuid = uuid4() elements_hash = create_elements(elements=elements, team=team, event_uuid=event_uuid) create_event( team=team, distinct_id=distinct_id, event="$autocapture", properties={ "$current_url": base_url, "$browser": browser, "$lib": "web", "$event_type": "click", }, timestamp=date + relativedelta(seconds=14), elements_hash=elements_hash, event_uuid=event_uuid, ) event_uuid = uuid4() create_event( event="$pageview", team=team, distinct_id=distinct_id, properties={ "$current_url": "%s/1" % base_url, "$browser": browser, "$lib": "web", }, timestamp=date + relativedelta(seconds=15), event_uuid=event_uuid, ) if index % 4 == 0: elements = [ Element( tag_name="button", attr_class=["btn", "btn-success"], text="Sign up!", ), Element(tag_name="form", attr_class=["form"]), Element(tag_name="div", attr_class=["container"]), Element(tag_name="body"), Element(tag_name="html"), ] event_uuid = uuid4() elements_hash = create_elements(elements=elements, team=team, event_uuid=event_uuid) create_event( team=team, event="$autocapture", distinct_id=distinct_id, properties={ "$current_url": "%s/1" % base_url, "$browser": browser, "$lib": "web", "$event_type": "click", }, timestamp=date + relativedelta(seconds=29), elements_hash=elements_hash, event_uuid=event_uuid, ) event_uuid = uuid4() create_event( event="$pageview", team=team, distinct_id=distinct_id, properties={ "$current_url": "%s/2" % base_url, "$browser": browser, "$lib": "web", }, timestamp=date + relativedelta(seconds=30), event_uuid=event_uuid, ) if index % 5 == 0: elements = [ Element( tag_name="button", attr_class=["btn", "btn-success"], text="Pay $10", ), Element(tag_name="form", attr_class=["form"]), Element(tag_name="div", attr_class=["container"]), Element(tag_name="body"), Element(tag_name="html"), ] event_uuid = uuid4() elements_hash = create_elements(elements=elements, team=team, event_uuid=event_uuid) create_event( team=team, event="$autocapture", distinct_id=distinct_id, properties={ "$current_url": "%s/2" % base_url, "$browser": browser, "$lib": "web", "$event_type": "click", }, timestamp=date + relativedelta(seconds=59), elements_hash=elements_hash, event_uuid=event_uuid, ) event_uuid = uuid4() create_event( event="purchase", team=team, distinct_id=distinct_id, properties={"price": 10}, timestamp=date + relativedelta(seconds=60), event_uuid=event_uuid, ) event_uuid = uuid4() create_event( event="$pageview", team=team, distinct_id=distinct_id, properties={ "$current_url": "%s/3" % base_url, "$browser": browser, "$lib": "web", }, timestamp=date + relativedelta(seconds=60), event_uuid=event_uuid, ) team.event_properties_numerical.append("purchase") team.save()
def test_funnel_correlation_with_event_properties_autocapture(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "event_with_properties", "funnel_correlation_event_names": ["$autocapture"], } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) # Need a minimum of 3 hits to get a correlation result for i in range(6): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="$autocapture", distinct_id=f"user_{i}", elements=[ Element(nth_of_type=1, nth_child=0, tag_name="a", href="/movie") ], timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "email", "$event_type": "click" }, ) # Test two different types of autocapture elements, with different counts, so we can accurately test results if i % 2 == 0: _create_event( team=self.team, event="$autocapture", distinct_id=f"user_{i}", elements=[ Element(nth_of_type=1, nth_child=0, tag_name="button", text="Pay $10") ], timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "facebook", "$event_type": "submit" }, ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) # Atleast one person that fails, to ensure we get results _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_fail", timestamp="2020-01-02T14:00:00Z", ) result = correlation._run()[0] # $autocapture results only return elements chain self.assertEqual( result, [ { "event": '$autocapture::elements_chain::click__~~__a:href="/movie"nth-child="0"nth-of-type="1"', "success_count": 6, "failure_count": 0, "odds_ratio": 14.0, "correlation_type": "success", }, { "event": '$autocapture::elements_chain::submit__~~__button:nth-child="0"nth-of-type="1"text="Pay $10"', "success_count": 3, "failure_count": 0, "odds_ratio": 2.0, "correlation_type": "success", }, ], ) self.assertEqual( len( self._get_people_for_event(filter, "$autocapture", {"signup_source": "facebook"})), 3) self.assertEqual( len( self._get_people_for_event(filter, "$autocapture", {"$event_type": "click"})), 6) self.assertEqual( len( self._get_people_for_event( filter, "$autocapture", [ { "key": "tag_name", "operator": "exact", "type": "element", "value": "button" }, { "key": "text", "operator": "exact", "type": "element", "value": "Pay $10" }, ], )), 3, ) self.assertEqual( len( self._get_people_for_event( filter, "$autocapture", [ { "key": "tag_name", "operator": "exact", "type": "element", "value": "a" }, { "key": "href", "operator": "exact", "type": "element", "value": "/movie" }, ], )), 6, )