def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: self._funnel_correlation = FunnelCorrelation(filter, team, base_uri=base_uri) super().__init__(team, filter)
def test_correlation_with_properties_raises_validation_error(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "properties", # "funnel_correlation_names": ["$browser"], missing value } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) _create_person(distinct_ids=[f"user_1"], team_id=self.team.pk, properties={"$browser": "Positive"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_1", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="rick", distinct_id=f"user_1", timestamp="2020-01-03T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_1", timestamp="2020-01-04T14:00:00Z", ) with self.assertRaises(ValidationError): correlation._run() filter = filter.with_data( {"funnel_correlation_type": "event_with_properties"}) # missing "funnel_correlation_event_names": ["rick"], with self.assertRaises(ValidationError): FunnelCorrelation(filter, self.team)._run()
def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: # Filtering on persons / groups properties can be pushed down to funnel_actors CTE new_correlation_filter = filter.with_data( { "properties": filter.property_groups.combine_properties( PropertyOperatorType.AND, filter.correlation_property_values or [] ).to_dict() } ) self._funnel_correlation = FunnelCorrelation(new_correlation_filter, team, base_uri=base_uri) super().__init__(team, filter)
def track_correlations_by_events(self): filter = Filter( data={"events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE,}, team=self.team, ) FunnelCorrelation(filter, self.team).run()
def calculate_funnel_correlation(self, request: Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request) result = FunnelCorrelation(filter=filter, team=team).run() return {"result": result}
def calculate_funnel_correlation(self, request: Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request) base_uri = request.build_absolute_uri("/") result = FunnelCorrelation(filter=filter, team=team, base_uri=base_uri).run() return {"result": result}
class _FunnelPropertyCorrelationActors(ActorBaseQuery): _filter: Filter def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: # Filtering on persons / groups properties can be pushed down to funnel_actors CTE new_correlation_filter = filter.with_data( { "properties": filter.property_groups.combine_properties( PropertyOperatorType.AND, filter.correlation_property_values or [] ).to_dict() } ) self._funnel_correlation = FunnelCorrelation(new_correlation_filter, team, base_uri=base_uri) super().__init__(team, filter) @cached_property def aggregation_group_type_index(self): return self._filter.aggregation_group_type_index def actor_query(self, limit_actors: Optional[bool] = True, extra_fields: Optional[List[str]] = None): if not self._filter.correlation_property_values: raise ValidationError("Property Correlation expects atleast one Property to get persons for") funnel_persons_query, funnel_persons_params = self._funnel_correlation.get_funnel_actors_cte() conversion_filter = ( f'funnel_actors.steps {"=" if self._filter.correlation_persons_converted else "<>"} target_step' if self._filter.correlation_persons_converted is not None else "" ) recording_event_select_statement = ( ", any(funnel_actors.matching_events) AS matching_events" if self._filter.include_recordings else "" ) query = f""" WITH funnel_actors AS ({funnel_persons_query}), %(target_step)s AS target_step SELECT funnel_actors.actor_id AS actor_id {recording_event_select_statement} FROM funnel_actors WHERE {conversion_filter} GROUP BY funnel_actors.actor_id ORDER BY actor_id {"LIMIT %(limit)s" if limit_actors else ""} {"OFFSET %(offset)s" if limit_actors else ""} """ params = { **funnel_persons_params, "target_step": len(self._filter.entities), "limit": self._filter.correlation_person_limit, "offset": self._filter.correlation_person_offset, } return query, params
def track_correlations_by_event_properties_materialized(self): filter = Filter( data={ "events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE, "funnel_correlation_type": FunnelCorrelationType.EVENT_WITH_PROPERTIES, "funnel_correlation_event_names": ["$autocapture"], }, team=self.team, ) FunnelCorrelation(filter, self.team).run()
def track_correlations_by_properties(self): filter = Filter( data={ "events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE, "funnel_correlation_type": FunnelCorrelationType.PROPERTIES, "funnel_correlation_names": ["$browser"], }, team=self.team, ) with no_materialized_columns(): FunnelCorrelation(filter, self.team).run()
def test_events_within_conversion_window_for_correlation(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "funnel_window_interval": "10", "funnel_window_interval_unit": "minute", "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "events", } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) _create_person(distinct_ids=["user_successful"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id="user_successful", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="positively_related", distinct_id="user_successful", timestamp="2020-01-02T14:02:00Z", ) _create_event( team=self.team, event="paid", distinct_id="user_successful", timestamp="2020-01-02T14:06:00Z", ) _create_person(distinct_ids=["user_dropoff"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id="user_dropoff", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="NOT_negatively_related", distinct_id="user_dropoff", timestamp= "2020-01-02T14:15:00Z", # event happened outside conversion window ) result = correlation._run()[0] odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore expected_odds_ratios = [4] for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual( result, [ { "event": "positively_related", "success_count": 1, "failure_count": 0, # "odds_ratio": 4.0, "correlation_type": "success", }, ], )
def test_basic_funnel_correlation_with_properties(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "properties", "funnel_correlation_names": ["$browser"], } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) for i in range(10): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={"$browser": "Positive"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) for i in range(10, 20): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={"$browser": "Negative"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="negatively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) # One Positive with failure _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk, properties={"$browser": "Positive"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_fail", timestamp="2020-01-02T14:00:00Z", ) # One Negative with success _create_person(distinct_ids=[f"user_succ"], team_id=self.team.pk, properties={"$browser": "Negative"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_succ", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_succ", timestamp="2020-01-04T14:00:00Z", ) result = correlation._run()[0] odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore # Success Total = 11, Failure Total = 11 # # Browser::Positive # Success: 10 # Failure: 1 # Browser::Negative # Success: 1 # Failure: 10 prior_count = 1 expected_odds_ratios = [ ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)), ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)), ] for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual( result, [ { "event": "$browser::Positive", "success_count": 10, "failure_count": 1, # "odds_ratio": 121/4, "correlation_type": "success", }, { "event": "$browser::Negative", "success_count": 1, "failure_count": 10, # "odds_ratio": 4/121, "correlation_type": "failure", }, ], )
def test_are_results_insignificant(self): # Same setup as above test: test_discarding_insignificant_events contingency_tables = [ EventContingencyTable( event="negatively_related", visited=EventStats(success_count=0, failure_count=5), success_total=10, failure_total=10, ), EventContingencyTable( event="positively_related", visited=EventStats(success_count=5, failure_count=0), success_total=10, failure_total=10, ), EventContingencyTable( event="low_sig_negatively_related", visited=EventStats(success_count=0, failure_count=2), success_total=10, failure_total=10, ), EventContingencyTable( event="low_sig_positively_related", visited=EventStats(success_count=1, failure_count=0), success_total=10, failure_total=10, ), ] # Discard both low_sig due to % FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.11 FunnelCorrelation.MIN_PERSON_COUNT = 25 result = [ 1 for contingency_table in contingency_tables if not FunnelCorrelation.are_results_insignificant(contingency_table) ] self.assertEqual(len(result), 2) # Discard one low_sig due to % FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.051 FunnelCorrelation.MIN_PERSON_COUNT = 25 result = [ 1 for contingency_table in contingency_tables if not FunnelCorrelation.are_results_insignificant(contingency_table) ] self.assertEqual(len(result), 3) # Discard both due to count FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 FunnelCorrelation.MIN_PERSON_COUNT = 3 result = [ 1 for contingency_table in contingency_tables if not FunnelCorrelation.are_results_insignificant(contingency_table) ] self.assertEqual(len(result), 2) # Discard one due to count FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 FunnelCorrelation.MIN_PERSON_COUNT = 2 result = [ 1 for contingency_table in contingency_tables if not FunnelCorrelation.are_results_insignificant(contingency_table) ] self.assertEqual(len(result), 3) # Discard everything due to % FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 FunnelCorrelation.MIN_PERSON_COUNT = 100 result = [ 1 for contingency_table in contingency_tables if not FunnelCorrelation.are_results_insignificant(contingency_table) ] self.assertEqual(len(result), 0) # Discard everything due to count FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 FunnelCorrelation.MIN_PERSON_COUNT = 6 result = [ 1 for contingency_table in contingency_tables if not FunnelCorrelation.are_results_insignificant(contingency_table) ] self.assertEqual(len(result), 0)
class _FunnelEventsCorrelationActors(ActorBaseQuery): _filter: Filter def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: self._funnel_correlation = FunnelCorrelation(filter, team, base_uri=base_uri) super().__init__(team, filter) @cached_property def aggregation_group_type_index(self): return self._filter.aggregation_group_type_index def actor_query(self, limit_actors: Optional[bool] = True): if not self._filter.correlation_person_entity: raise ValidationError("No entity for persons specified") assert isinstance(self._filter.correlation_person_entity, Entity) funnel_persons_query, funnel_persons_params = self._funnel_correlation.get_funnel_actors_cte( ) prop_filters = self._filter.correlation_person_entity.property_groups # TRICKY: We use "events" as an alias here while the eventquery uses "e" by default event_query = FunnelEventQuery(self._filter, self._team.pk) event_query.EVENT_TABLE_ALIAS = "events" prop_query, prop_params = event_query._get_prop_groups(prop_filters) conversion_filter = ( f'AND actors.steps {"=" if self._filter.correlation_persons_converted else "<>"} target_step' if self._filter.correlation_persons_converted is not None else "") event_join_query = self._funnel_correlation._get_events_join_query() recording_event_select_statement = ( ", any(actors.matching_events) AS matching_events" if self._filter.include_recordings else "") query = f""" WITH funnel_actors as ({funnel_persons_query}), toDateTime(%(date_to)s) AS date_to, toDateTime(%(date_from)s) AS date_from, %(target_step)s AS target_step, %(funnel_step_names)s as funnel_step_names SELECT actors.actor_id AS actor_id {recording_event_select_statement} FROM events AS event {event_join_query} AND event.event = %(target_event)s {conversion_filter} {prop_query} GROUP BY actor_id ORDER BY actor_id {"LIMIT %(limit)s" if limit_actors else ""} {"OFFSET %(offset)s" if limit_actors else ""} """ params = { **funnel_persons_params, **prop_params, "target_event": self._filter.correlation_person_entity.id, "funnel_step_names": [entity.id for entity in self._filter.events], "target_step": len(self._filter.entities), "limit": self._filter.correlation_person_limit, "offset": self._filter.correlation_person_offset, } return query, params
def test_no_divide_by_zero_errors(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) for i in range(2): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={"$browser": "Positive"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) # failure count for this event is 0 _create_event( team=self.team, event="positive", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) for i in range(2, 4): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={"$browser": "Negative"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: # success count for this event is 0 _create_event( team=self.team, event="negatively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) results = correlation._run() self.assertFalse(results[1]) result = results[0] odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore expected_odds_ratios = [9, 1 / 3] for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual( result, [ { "event": "positive", "success_count": 2, "failure_count": 0, # "odds_ratio": 9.0, "correlation_type": "success", }, { "event": "negatively_related", "success_count": 0, "failure_count": 1, # "odds_ratio": 1 / 3, "correlation_type": "failure", }, ], )
class _FunnelPropertyCorrelationActors(ActorBaseQuery): _filter: Filter def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: self._funnel_correlation = FunnelCorrelation(filter, team, base_uri=base_uri) super().__init__(team, filter) @cached_property def aggregation_group_type_index(self): return self._filter.aggregation_group_type_index def actor_query(self, limit_actors: Optional[bool] = True, extra_fields: Optional[List[str]] = None): if not self._filter.correlation_property_values: raise ValidationError( "Property Correlation expects atleast one Property to get persons for" ) funnel_persons_query, funnel_persons_params = self._funnel_correlation.get_funnel_actors_cte( ) conversion_filter = ( f'funnel_actors.steps {"=" if self._filter.correlation_persons_converted else "<>"} target_step' if self._filter.correlation_persons_converted is not None else "") actor_join_subquery, actor_join_subquery_params = self._get_actor_subquery( ) group_filters, group_filters_params = self._get_group_filters() recording_event_select_statement = ( ", any(funnel_actors.matching_events) AS matching_events" if self._filter.include_recordings else "") query = f""" WITH funnel_actors AS ({funnel_persons_query}), %(target_step)s AS target_step SELECT funnel_actors.actor_id AS actor_id {recording_event_select_statement} FROM funnel_actors {actor_join_subquery} WHERE {conversion_filter} {group_filters} GROUP BY funnel_actors.actor_id ORDER BY actor_id {"LIMIT %(limit)s" if limit_actors else ""} {"OFFSET %(offset)s" if limit_actors else ""} """ params = { **funnel_persons_params, **actor_join_subquery_params, **group_filters_params, "target_step": len(self._filter.entities), "limit": self._filter.correlation_person_limit, "offset": self._filter.correlation_person_offset, } return query, params def _get_actor_subquery(self) -> Tuple[str, Dict[str, Any]]: if self.is_aggregating_by_groups: actor_join_subquery, actor_join_subquery_params = GroupsJoinQuery( self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query() else: person_query, actor_join_subquery_params = ClickhousePersonQuery( self._filter, self._team.pk, entity=Entity({ "id": "person", "type": "events", "properties": self._filter.correlation_property_values }), ).get_query() actor_join_subquery = f""" JOIN ({person_query}) person ON person.id = funnel_actors.actor_id """ return actor_join_subquery, actor_join_subquery_params def _get_group_filters(self): if self.is_aggregating_by_groups: conditions, params = [""], {} properties = self._filter.correlation_property_values if properties: for index, property in enumerate(properties): if property.type != "group": continue expr, prop_params = prop_filter_json_extract( property, index, prepend=f"group_type_{property.group_type_index}", prop_var= f"group_properties_{property.group_type_index}", allow_denormalized_props=True, ) conditions.append(expr) params.update(prop_params) return " ".join(conditions), params else: return "", {}
def test_basic_funnel_correlation_with_events(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "events", } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) for i in range(10): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="positively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) for i in range(10, 20): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="negatively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) result = correlation._run()[0] odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore expected_odds_ratios = [11, 1 / 11] for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual( result, [ { "event": "positively_related", "success_count": 5, "failure_count": 0, # "odds_ratio": 11.0, "correlation_type": "success", }, { "event": "negatively_related", "success_count": 0, "failure_count": 5, # "odds_ratio": 1 / 11, "correlation_type": "failure", }, ], ) self.assertEqual( len(self._get_people_for_event(filter, "positively_related")), 5) self.assertEqual( len( self._get_people_for_event(filter, "positively_related", success=False)), 0) self.assertEqual( len( self._get_people_for_event(filter, "negatively_related", success=False)), 5) self.assertEqual( len(self._get_people_for_event(filter, "negatively_related")), 0) # Now exclude positively_related filter = filter.with_data( {"funnel_correlation_exclude_event_names": ["positively_related"]}) correlation = FunnelCorrelation(filter, self.team) result = correlation._run()[0] odds_ratio = result[0].pop("odds_ratio") # type: ignore expected_odds_ratio = 1 / 11 self.assertAlmostEqual(odds_ratio, expected_odds_ratio) self.assertEqual( result, [ { "event": "negatively_related", "success_count": 0, "failure_count": 5, # "odds_ratio": 1 / 11, "correlation_type": "failure", }, ], ) # Getting specific people isn't affected by exclude_events self.assertEqual( len(self._get_people_for_event(filter, "positively_related")), 5) self.assertEqual( len( self._get_people_for_event(filter, "positively_related", success=False)), 0) self.assertEqual( len( self._get_people_for_event(filter, "negatively_related", success=False)), 5) self.assertEqual( len(self._get_people_for_event(filter, "negatively_related")), 0)
def test_correlation_with_multiple_properties(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "properties", "funnel_correlation_names": ["$browser", "$nice"], } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) # 5 successful people with both properties for i in range(5): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={ "$browser": "Positive", "$nice": "very" }) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) # 10 successful people with some different properties for i in range(5, 15): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={ "$browser": "Positive", "$nice": "not" }) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) # 5 Unsuccessful people with some common properties for i in range(15, 20): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk, properties={ "$browser": "Negative", "$nice": "smh" }) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) # One Positive with failure, no $nice property _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk, properties={"$browser": "Positive"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_fail", timestamp="2020-01-02T14:00:00Z", ) # One Negative with success, no $nice property _create_person(distinct_ids=[f"user_succ"], team_id=self.team.pk, properties={"$browser": "Negative"}) _create_event( team=self.team, event="user signed up", distinct_id=f"user_succ", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_succ", timestamp="2020-01-04T14:00:00Z", ) result = correlation._run()[0] # Success Total = 5 + 10 + 1 = 16 # Failure Total = 5 + 1 = 6 # Add 1 for priors odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore expected_odds_ratios = [ (16 / 2) * ((7 - 1) / (17 - 15)), (11 / 1) * ((7 - 0) / (17 - 10)), (6 / 1) * ((7 - 0) / (17 - 5)), (1 / 6) * ((7 - 5) / (17 - 0)), (2 / 6) * ((7 - 5) / (17 - 1)), (2 / 2) * ((7 - 1) / (17 - 1)), ] # (success + 1) / (failure + 1) for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) expected_result = [ { "event": "$browser::Positive", "success_count": 15, "failure_count": 1, # "odds_ratio": 24, "correlation_type": "success", }, { "event": "$nice::not", "success_count": 10, "failure_count": 0, # "odds_ratio": 11, "correlation_type": "success", }, { "event": "$nice::very", "success_count": 5, "failure_count": 0, # "odds_ratio": 3.5, "correlation_type": "success", }, { "event": "$nice::smh", "success_count": 0, "failure_count": 5, # "odds_ratio": 0.0196078431372549, "correlation_type": "failure", }, { "event": "$browser::Negative", "success_count": 1, "failure_count": 5, # "odds_ratio": 0.041666666666666664, "correlation_type": "failure", }, { "event": "$nice::", "success_count": 1, "failure_count": 1, # "odds_ratio": 0.375, "correlation_type": "failure", }, ] self.assertEqual(result, expected_result) # _run property correlation with filter on all properties filter = filter.with_data({"funnel_correlation_names": ["$all"]}) correlation = FunnelCorrelation(filter, self.team) new_result = correlation._run()[0] odds_ratios = [item.pop("odds_ratio") for item in new_result] # type: ignore new_expected_odds_ratios = expected_odds_ratios[:-1] new_expected_result = expected_result[:-1] # When querying all properties, we don't consider properties that don't exist for part of the data # since users aren't explicitly asking for that property. Thus, # We discard $nice:: because it's an empty result set for odds, expected_odds in zip(odds_ratios, new_expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual(new_result, new_expected_result) filter = filter.with_data( {"funnel_correlation_exclude_names": ["$browser"]}) # search for $all but exclude $browser correlation = FunnelCorrelation(filter, self.team) new_result = correlation._run()[0] odds_ratios = [item.pop("odds_ratio") for item in new_result] # type: ignore new_expected_odds_ratios = expected_odds_ratios[ 1:4] # choosing the $nice property values new_expected_result = expected_result[1:4] for odds, expected_odds in zip(odds_ratios, new_expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual(new_result, new_expected_result)
def test_funnel_correlation_with_event_properties_autocapture(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "event_with_properties", "funnel_correlation_event_names": ["$autocapture"], } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) # Need a minimum of 3 hits to get a correlation result for i in range(6): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="$autocapture", distinct_id=f"user_{i}", elements=[ Element(nth_of_type=1, nth_child=0, tag_name="a", href="/movie") ], timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "email", "$event_type": "click" }, ) # Test two different types of autocapture elements, with different counts, so we can accurately test results if i % 2 == 0: _create_event( team=self.team, event="$autocapture", distinct_id=f"user_{i}", elements=[ Element(nth_of_type=1, nth_child=0, tag_name="button", text="Pay $10") ], timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "facebook", "$event_type": "submit" }, ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) # Atleast one person that fails, to ensure we get results _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_fail", timestamp="2020-01-02T14:00:00Z", ) result = correlation._run()[0] # $autocapture results only return elements chain self.assertEqual( result, [ { "event": '$autocapture::elements_chain::click__~~__a:href="/movie"nth-child="0"nth-of-type="1"', "success_count": 6, "failure_count": 0, "odds_ratio": 14.0, "correlation_type": "success", }, { "event": '$autocapture::elements_chain::submit__~~__button:nth-child="0"nth-of-type="1"text="Pay $10"', "success_count": 3, "failure_count": 0, "odds_ratio": 2.0, "correlation_type": "success", }, ], ) self.assertEqual( len( self._get_people_for_event(filter, "$autocapture", {"signup_source": "facebook"})), 3) self.assertEqual( len( self._get_people_for_event(filter, "$autocapture", {"$event_type": "click"})), 6) self.assertEqual( len( self._get_people_for_event( filter, "$autocapture", [ { "key": "tag_name", "operator": "exact", "type": "element", "value": "button" }, { "key": "text", "operator": "exact", "type": "element", "value": "Pay $10" }, ], )), 3, ) self.assertEqual( len( self._get_people_for_event( filter, "$autocapture", [ { "key": "tag_name", "operator": "exact", "type": "element", "value": "a" }, { "key": "href", "operator": "exact", "type": "element", "value": "/movie" }, ], )), 6, )
def test_discarding_insignificant_events(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "events", } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) for i in range(10): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="positively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) if i % 10 == 0: _create_event( team=self.team, event="low_sig_positively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:20:00Z", ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) for i in range(10, 20): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="negatively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) if i % 5 == 0: _create_event( team=self.team, event="low_sig_negatively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", ) # Total 10 positive, 10 negative # low sig count = 1 and 2, high sig count >= 5 # Thus, to discard the low sig count, % needs to be >= 10%, or count >= 2 # Discard both due to % FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.11 FunnelCorrelation.MIN_PERSON_COUNT = 25 result = correlation._run()[0] self.assertEqual(len(result), 2)
def test_funnel_correlation_with_event_properties(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "event_with_properties", "funnel_correlation_event_names": ["positively_related", "negatively_related"], } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) for i in range(10): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="positively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "facebook" if i % 4 == 0 else "email", "blah": "value_bleh" }, ) # source: email occurs only twice, so would be discarded from result set _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) for i in range(10, 20): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) if i % 2 == 0: _create_event( team=self.team, event="negatively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "shazam" if i % 6 == 0 else "email" }, ) # source: shazam occurs only once, so would be discarded from result set result = correlation._run()[0] odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore expected_odds_ratios = [11, 5.5, 2 / 11] for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): self.assertAlmostEqual(odds, expected_odds) self.assertEqual( result, [ { "event": "positively_related::blah::value_bleh", "success_count": 5, "failure_count": 0, # "odds_ratio": 11.0, "correlation_type": "success", }, { "event": "positively_related::signup_source::facebook", "success_count": 3, "failure_count": 0, # "odds_ratio": 5.5, "correlation_type": "success", }, { "event": "negatively_related::signup_source::email", "success_count": 0, "failure_count": 3, # "odds_ratio": 0.18181818181818182, "correlation_type": "failure", }, ], ) self.assertEqual( len( self._get_people_for_event(filter, "positively_related", {"blah": "value_bleh"})), 5) self.assertEqual( len( self._get_people_for_event(filter, "positively_related", {"signup_source": "facebook"})), 3) self.assertEqual( len( self._get_people_for_event(filter, "positively_related", {"signup_source": "facebook"}, False)), 0) self.assertEqual( len( self._get_people_for_event(filter, "negatively_related", {"signup_source": "email"}, False)), 3)
def test_funnel_correlation_with_event_properties_exclusions(self): filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, { "id": "paid", "type": "events", "order": 1 }, ], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "funnel_correlation_type": "event_with_properties", "funnel_correlation_event_names": ["positively_related"], "funnel_correlation_event_exclude_property_names": ["signup_source"], } filter = Filter(data=filters) correlation = FunnelCorrelation(filter, self.team) # Need more than 2 events to get a correlation for i in range(3): _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_{i}", timestamp="2020-01-02T14:00:00Z", ) _create_event( team=self.team, event="positively_related", distinct_id=f"user_{i}", timestamp="2020-01-03T14:00:00Z", properties={ "signup_source": "facebook", "blah": "value_bleh" }, ) _create_event( team=self.team, event="paid", distinct_id=f"user_{i}", timestamp="2020-01-04T14:00:00Z", ) # Atleast one person that fails, to ensure we get results _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) _create_event( team=self.team, event="user signed up", distinct_id=f"user_fail", timestamp="2020-01-02T14:00:00Z", ) result = correlation._run()[0] self.assertEqual( result, [ { "event": "positively_related::blah::value_bleh", "success_count": 3, "failure_count": 0, "odds_ratio": 8, "correlation_type": "success", }, # missing signup_source, as expected ], ) self.assertEqual( len( self._get_people_for_event(filter, "positively_related", {"blah": "value_bleh"})), 3) # If you search for persons with a specific property, even if excluded earlier, you should get them self.assertEqual( len( self._get_people_for_event(filter, "positively_related", {"signup_source": "facebook"})), 3)