def _get_sequence_filter( self, prop: Property, idx: int ) -> Tuple[List[str], List[str], List[str], Dict[str, Any]]: event = validate_entity((prop.event_type, prop.key)) entity_query, entity_params = self._get_entity( event, f"event_sequence_{self._cohort_pk}", idx) seq_event = validate_entity((prop.seq_event_type, prop.seq_event)) seq_entity_query, seq_entity_params = self._get_entity( seq_event, f"seq_event_sequence_{self._cohort_pk}", idx) time_value = parse_and_validate_positive_integer( prop.time_value, "time_value") time_interval = validate_interval(prop.time_interval) seq_date_value = parse_and_validate_positive_integer( prop.seq_time_value, "time_value") seq_date_interval = validate_interval(prop.seq_time_interval) self._check_earliest_date((time_value, time_interval)) event_prepend = f"event_{idx}" duplicate_event = 0 if event == seq_event: duplicate_event = 1 aggregate_cols = [] aggregate_condition = f"{'NOT' if prop.negation else ''} max(if({entity_query} AND {event_prepend}_latest_0 < {event_prepend}_latest_1 AND {event_prepend}_latest_1 <= {event_prepend}_latest_0 + INTERVAL {seq_date_value} {seq_date_interval}, 2, 1)) = 2 AS {self.SEQUENCE_FIELD_ALIAS}_{self.sequence_filters_lookup[str(prop.to_dict())]}" aggregate_cols.append(aggregate_condition) condition_cols = [] timestamp_condition = f"min({event_prepend}_latest_1) over (PARTITION by person_id ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND {duplicate_event} PRECEDING) {event_prepend}_latest_1" condition_cols.append(f"{event_prepend}_latest_0") condition_cols.append(timestamp_condition) step_cols = [] step_cols.append( f"if({entity_query} AND timestamp > now() - INTERVAL {time_value} {time_interval}, 1, 0) AS {event_prepend}_step_0" ) step_cols.append( f"if({event_prepend}_step_0 = 1, timestamp, null) AS {event_prepend}_latest_0" ) step_cols.append( f"if({seq_entity_query} AND timestamp > now() - INTERVAL {time_value} {time_interval}, 1, 0) AS {event_prepend}_step_1" ) step_cols.append( f"if({event_prepend}_step_1 = 1, timestamp, null) AS {event_prepend}_latest_1" ) return step_cols, condition_cols, aggregate_cols, { **entity_params, **seq_entity_params }
def get_restarted_performing_event(self, prop: Property, prepend: str, idx: int) -> Tuple[str, Dict[str, Any]]: event = (prop.event_type, prop.key) column_name = f"restarted_event_condition_{prepend}_{idx}" entity_query, entity_params = self._get_entity(event, prepend, idx) date_value = parse_and_validate_positive_integer( prop.time_value, "time_value") date_param = f"{prepend}_date_{idx}" date_interval = validate_interval(prop.time_interval) seq_date_value = parse_and_validate_positive_integer( prop.seq_time_value, "time_value") seq_date_param = f"{prepend}_seq_date_{idx}" seq_date_interval = validate_interval(prop.seq_time_interval) validate_seq_date_more_recent_than_date( (seq_date_value, seq_date_interval), (date_value, date_interval)) self._restrict_event_query_by_time = False # Events should have been fired in the initial_period initial_period = f"countIf(timestamp <= now() - INTERVAL %({date_param})s {date_interval} AND {entity_query})" # Then stopped in the event_stopped_period event_stopped_period = f"countIf(timestamp > now() - INTERVAL %({date_param})s {date_interval} AND timestamp <= now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND {entity_query})" # Then restarted in the final event_restart_period event_restarted_period = f"countIf(timestamp > now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND timestamp <= now() AND {entity_query})" full_condition = ( f"({initial_period} > 0 AND {event_stopped_period} = 0 AND {event_restarted_period} > 0) as {column_name}" ) self._fields.append(full_condition) return ( column_name, { f"{date_param}": date_value, f"{seq_date_param}": seq_date_value, **entity_params }, )
def get_performed_event_regularly(self, prop: Property, prepend: str, idx: int) -> Tuple[str, Dict[str, Any]]: event = (prop.event_type, prop.key) entity_query, entity_params = self._get_entity(event, prepend, idx) column_name = f"performed_event_regularly_{prepend}_{idx}" date_interval = validate_interval(prop.time_interval) time_value_param = f"{prepend}_time_value_{idx}" time_value = parse_and_validate_positive_integer( prop.time_value, "time_value") operator_value_param = f"{prepend}_operator_value_{idx}" operator_value = parse_and_validate_positive_integer( prop.operator_value, "operator_value") min_periods_param = f"{prepend}_min_periods_{idx}" min_period_count = parse_and_validate_positive_integer( prop.min_periods, "min_periods") total_period_count = parse_and_validate_positive_integer( prop.total_periods, "total_periods") if min_period_count > total_period_count: raise (ValueError( f"min_periods ({min_period_count}) cannot be greater than total_periods ({total_period_count})" )) params = { time_value_param: time_value, operator_value_param: operator_value, min_periods_param: min_period_count, } periods = [] if total_period_count: for period in range(total_period_count): start_time_value = f"%({time_value_param})s * {period}" end_time_value = f"%({time_value_param})s * ({period} + 1)" # Clause that returns 1 if the event was performed the expected number of times in the given time interval, otherwise 0 periods.append( f"if(countIf({entity_query} and timestamp <= now() - INTERVAL {start_time_value} {date_interval} and timestamp > now() - INTERVAL {end_time_value} {date_interval}) {get_count_operator(prop.operator)} %({operator_value_param})s, 1, 0)" ) earliest_date = (total_period_count * time_value, date_interval) self._check_earliest_date(earliest_date) field = "+".join( periods) + f">= %({min_periods_param})s" + f" as {column_name}" self._fields.append(field) return column_name, {**entity_params, **params}
def get_stopped_performing_event(self, prop: Property, prepend: str, idx: int) -> Tuple[str, Dict[str, Any]]: event = (prop.event_type, prop.key) column_name = f"stopped_event_condition_{prepend}_{idx}" entity_query, entity_params = self._get_entity(event, prepend, idx) date_value = parse_and_validate_positive_integer( prop.time_value, "time_value") date_param = f"{prepend}_date_{idx}" date_interval = validate_interval(prop.time_interval) seq_date_value = parse_and_validate_positive_integer( prop.seq_time_value, "time_value") seq_date_param = f"{prepend}_seq_date_{idx}" seq_date_interval = validate_interval(prop.seq_time_interval) validate_seq_date_more_recent_than_date( (seq_date_value, seq_date_interval), (date_value, date_interval)) self._check_earliest_date((date_value, date_interval)) # The user was doing the event in this time period event_was_happening_period = f"countIf(timestamp > now() - INTERVAL %({date_param})s {date_interval} AND timestamp <= now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND {entity_query})" # Then stopped in this time period event_stopped_period = f"countIf(timestamp > now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND timestamp <= now() AND {entity_query})" full_condition = f"({event_was_happening_period} > 0 AND {event_stopped_period} = 0) as {column_name}" self._fields.append(full_condition) return ( column_name, { f"{date_param}": date_value, f"{seq_date_param}": seq_date_value, **entity_params }, )
def get_performed_event_first_time(self, prop: Property, prepend: str, idx: int) -> Tuple[str, Dict[str, Any]]: event = (prop.event_type, prop.key) entity_query, entity_params = self._get_entity(event, prepend, idx) column_name = f"first_time_condition_{prepend}_{idx}" date_value = parse_and_validate_positive_integer( prop.time_value, "time_value") date_param = f"{prepend}_date_{idx}" date_interval = validate_interval(prop.time_interval) self._restrict_event_query_by_time = False field = f"minIf(timestamp, {entity_query}) >= now() - INTERVAL %({date_param})s {date_interval} AND minIf(timestamp, {entity_query}) < now() as {column_name}" self._fields.append(field) return column_name, {f"{date_param}": date_value, **entity_params}