예제 #1
0
    def _get_sequence_filter(
            self, prop: Property, idx: int
    ) -> Tuple[List[str], List[str], List[str], Dict[str, Any]]:
        event = validate_entity((prop.event_type, prop.key))
        entity_query, entity_params = self._get_entity(
            event, f"event_sequence_{self._cohort_pk}", idx)
        seq_event = validate_entity((prop.seq_event_type, prop.seq_event))

        seq_entity_query, seq_entity_params = self._get_entity(
            seq_event, f"seq_event_sequence_{self._cohort_pk}", idx)

        time_value = parse_and_validate_positive_integer(
            prop.time_value, "time_value")
        time_interval = validate_interval(prop.time_interval)
        seq_date_value = parse_and_validate_positive_integer(
            prop.seq_time_value, "time_value")
        seq_date_interval = validate_interval(prop.seq_time_interval)
        self._check_earliest_date((time_value, time_interval))

        event_prepend = f"event_{idx}"

        duplicate_event = 0
        if event == seq_event:
            duplicate_event = 1

        aggregate_cols = []
        aggregate_condition = f"{'NOT' if prop.negation else ''} max(if({entity_query} AND {event_prepend}_latest_0 < {event_prepend}_latest_1 AND {event_prepend}_latest_1 <= {event_prepend}_latest_0 + INTERVAL {seq_date_value} {seq_date_interval}, 2, 1)) = 2 AS {self.SEQUENCE_FIELD_ALIAS}_{self.sequence_filters_lookup[str(prop.to_dict())]}"
        aggregate_cols.append(aggregate_condition)

        condition_cols = []
        timestamp_condition = f"min({event_prepend}_latest_1) over (PARTITION by person_id ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND {duplicate_event} PRECEDING) {event_prepend}_latest_1"
        condition_cols.append(f"{event_prepend}_latest_0")
        condition_cols.append(timestamp_condition)

        step_cols = []
        step_cols.append(
            f"if({entity_query} AND timestamp > now() - INTERVAL {time_value} {time_interval}, 1, 0) AS {event_prepend}_step_0"
        )
        step_cols.append(
            f"if({event_prepend}_step_0 = 1, timestamp, null) AS {event_prepend}_latest_0"
        )

        step_cols.append(
            f"if({seq_entity_query} AND timestamp > now() - INTERVAL {time_value} {time_interval}, 1, 0) AS {event_prepend}_step_1"
        )
        step_cols.append(
            f"if({event_prepend}_step_1 = 1, timestamp, null) AS {event_prepend}_latest_1"
        )

        return step_cols, condition_cols, aggregate_cols, {
            **entity_params,
            **seq_entity_params
        }
예제 #2
0
    def get_restarted_performing_event(self, prop: Property, prepend: str,
                                       idx: int) -> Tuple[str, Dict[str, Any]]:
        event = (prop.event_type, prop.key)
        column_name = f"restarted_event_condition_{prepend}_{idx}"

        entity_query, entity_params = self._get_entity(event, prepend, idx)
        date_value = parse_and_validate_positive_integer(
            prop.time_value, "time_value")
        date_param = f"{prepend}_date_{idx}"
        date_interval = validate_interval(prop.time_interval)

        seq_date_value = parse_and_validate_positive_integer(
            prop.seq_time_value, "time_value")
        seq_date_param = f"{prepend}_seq_date_{idx}"
        seq_date_interval = validate_interval(prop.seq_time_interval)

        validate_seq_date_more_recent_than_date(
            (seq_date_value, seq_date_interval), (date_value, date_interval))

        self._restrict_event_query_by_time = False

        # Events should have been fired in the initial_period
        initial_period = f"countIf(timestamp <= now() - INTERVAL %({date_param})s {date_interval} AND {entity_query})"
        # Then stopped in the event_stopped_period
        event_stopped_period = f"countIf(timestamp > now() - INTERVAL %({date_param})s {date_interval} AND timestamp <= now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND {entity_query})"
        # Then restarted in the final event_restart_period
        event_restarted_period = f"countIf(timestamp > now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND timestamp <= now() AND {entity_query})"

        full_condition = (
            f"({initial_period} > 0 AND {event_stopped_period} = 0 AND {event_restarted_period} > 0) as {column_name}"
        )

        self._fields.append(full_condition)

        return (
            column_name,
            {
                f"{date_param}": date_value,
                f"{seq_date_param}": seq_date_value,
                **entity_params
            },
        )
예제 #3
0
    def get_performed_event_regularly(self, prop: Property, prepend: str,
                                      idx: int) -> Tuple[str, Dict[str, Any]]:
        event = (prop.event_type, prop.key)
        entity_query, entity_params = self._get_entity(event, prepend, idx)

        column_name = f"performed_event_regularly_{prepend}_{idx}"

        date_interval = validate_interval(prop.time_interval)

        time_value_param = f"{prepend}_time_value_{idx}"
        time_value = parse_and_validate_positive_integer(
            prop.time_value, "time_value")

        operator_value_param = f"{prepend}_operator_value_{idx}"
        operator_value = parse_and_validate_positive_integer(
            prop.operator_value, "operator_value")

        min_periods_param = f"{prepend}_min_periods_{idx}"
        min_period_count = parse_and_validate_positive_integer(
            prop.min_periods, "min_periods")

        total_period_count = parse_and_validate_positive_integer(
            prop.total_periods, "total_periods")

        if min_period_count > total_period_count:
            raise (ValueError(
                f"min_periods ({min_period_count}) cannot be greater than total_periods ({total_period_count})"
            ))

        params = {
            time_value_param: time_value,
            operator_value_param: operator_value,
            min_periods_param: min_period_count,
        }
        periods = []

        if total_period_count:
            for period in range(total_period_count):
                start_time_value = f"%({time_value_param})s * {period}"
                end_time_value = f"%({time_value_param})s * ({period} + 1)"
                # Clause that returns 1 if the event was performed the expected number of times in the given time interval, otherwise 0
                periods.append(
                    f"if(countIf({entity_query} and timestamp <= now() - INTERVAL {start_time_value} {date_interval} and timestamp > now() - INTERVAL {end_time_value} {date_interval}) {get_count_operator(prop.operator)} %({operator_value_param})s, 1, 0)"
                )
        earliest_date = (total_period_count * time_value, date_interval)
        self._check_earliest_date(earliest_date)

        field = "+".join(
            periods) + f">= %({min_periods_param})s" + f" as {column_name}"

        self._fields.append(field)

        return column_name, {**entity_params, **params}
예제 #4
0
    def get_stopped_performing_event(self, prop: Property, prepend: str,
                                     idx: int) -> Tuple[str, Dict[str, Any]]:
        event = (prop.event_type, prop.key)
        column_name = f"stopped_event_condition_{prepend}_{idx}"

        entity_query, entity_params = self._get_entity(event, prepend, idx)
        date_value = parse_and_validate_positive_integer(
            prop.time_value, "time_value")
        date_param = f"{prepend}_date_{idx}"
        date_interval = validate_interval(prop.time_interval)

        seq_date_value = parse_and_validate_positive_integer(
            prop.seq_time_value, "time_value")
        seq_date_param = f"{prepend}_seq_date_{idx}"
        seq_date_interval = validate_interval(prop.seq_time_interval)

        validate_seq_date_more_recent_than_date(
            (seq_date_value, seq_date_interval), (date_value, date_interval))

        self._check_earliest_date((date_value, date_interval))

        # The user was doing the event in this time period
        event_was_happening_period = f"countIf(timestamp > now() - INTERVAL %({date_param})s {date_interval} AND timestamp <= now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND {entity_query})"
        # Then stopped in this time period
        event_stopped_period = f"countIf(timestamp > now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND timestamp <= now() AND {entity_query})"

        full_condition = f"({event_was_happening_period} > 0 AND {event_stopped_period} = 0) as {column_name}"

        self._fields.append(full_condition)

        return (
            column_name,
            {
                f"{date_param}": date_value,
                f"{seq_date_param}": seq_date_value,
                **entity_params
            },
        )
예제 #5
0
    def get_performed_event_first_time(self, prop: Property, prepend: str,
                                       idx: int) -> Tuple[str, Dict[str, Any]]:
        event = (prop.event_type, prop.key)
        entity_query, entity_params = self._get_entity(event, prepend, idx)

        column_name = f"first_time_condition_{prepend}_{idx}"

        date_value = parse_and_validate_positive_integer(
            prop.time_value, "time_value")
        date_param = f"{prepend}_date_{idx}"
        date_interval = validate_interval(prop.time_interval)

        self._restrict_event_query_by_time = False

        field = f"minIf(timestamp, {entity_query}) >= now() - INTERVAL %({date_param})s {date_interval} AND minIf(timestamp, {entity_query}) < now() as {column_name}"

        self._fields.append(field)

        return column_name, {f"{date_param}": date_value, **entity_params}